{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e21622cf",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 1、(15.0分)因为NumPy数组在数值运算方面的效率优于Python提供的list，\n",
    "# 所以灵活掌握NumPy中数组的创建和基础的运算是有必要的。\n",
    "# 请使用NumPy库，编写Python代码完成下列操作：\n",
    "# ①启动Jupyter Notebook创建一个Notebook。（2分）\n",
    "# ②创建一个数值从0至1，间隔为0.01的数组arr1。（2分）\n",
    "# ③创建一个包含101个服从正态分布的随机数的数组arr2。（注意：数组arr2为一维数组）（2分）\n",
    "# ④对数组arr1和数组arr2进行四则运算，并输出其结果。（四则运算包括加、减、乘、除运算）（2分）\n",
    "# ⑤对数组arr2进行简单的统计分析，并输出其结果。\n",
    "# （统计分析包括对数组进行升序排序、求和、求均值、求标准差和求最小值操作）。（5分）\n",
    "# ⑥将数组arr1和数组arr2存储为当前工作路径下的一个二进制格式的文件arr.npz。（2分）"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "3c815803",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np \n",
    "arr1 = np.arange(0, 1.01, 0.01)\n",
    "arr2 = np.random.randn(101)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "e8034d9c",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "两数组之和为：\n",
      " [-0.59570818 -1.20938474  0.54410282  0.0843843  -0.52917642  0.98608763\n",
      " -0.89690309  0.26103604  0.82441727 -1.41283173 -0.85502218  0.71199106\n",
      " -0.5649032   1.02119414 -0.2360591   0.61098147 -1.46136053 -0.35775862\n",
      " -0.63980967  0.58103246  2.06538097  1.89145697 -0.28082694  0.48276675\n",
      "  1.3275043  -0.2315986  -1.26081066  2.03848421 -0.04106137  0.92828911\n",
      "  0.90263329 -0.67302925 -0.80487084  0.87012829 -0.16034527  0.71268527\n",
      "  0.75160628  0.77192203  0.62302749 -0.02190261 -1.08923214  1.89584504\n",
      "  0.03407364  0.1021216   0.32736252  0.98810306 -0.58769269 -1.09518455\n",
      "  1.62006198 -1.42608249 -0.17913382  0.70673302  0.53947685  0.659125\n",
      " -1.32202331  0.99110039  1.31816999  1.15289147  1.15102027 -2.29717189\n",
      "  3.40745957  1.23875275  1.16881664  1.06210111  0.78911814  0.93297455\n",
      " -0.28104086  0.78408708  1.43141452  0.70700278  0.25852303  1.10780687\n",
      "  0.30612782 -0.2921981  -0.64857053  1.1244556  -0.04447044  2.20582425\n",
      " -1.13042364  0.78971127 -0.59363196  2.33574834  0.30719964  2.5416502\n",
      "  0.50431241  1.60864733  0.92506571 -0.23072479  1.77247572 -0.48410129\n",
      "  1.84845554  0.05676575  0.92065295  1.51924194  0.94649499  0.67532487\n",
      "  0.56764889  0.8071035   0.88908913 -0.1313589   2.48367726]\n",
      "两数组之差为：\n",
      " [ 0.59570818  1.22938474 -0.50410282 -0.0243843   0.60917642 -0.88608763\n",
      "  1.01690309 -0.12103604 -0.66441727  1.59283173  1.05502218 -0.49199106\n",
      "  0.8049032  -0.76119414  0.5160591  -0.31098147  1.78136053  0.69775862\n",
      "  0.99980967 -0.20103246 -1.66538097 -1.47145697  0.72082694 -0.02276675\n",
      " -0.8475043   0.7315986   1.78081066 -1.49848421  0.60106137 -0.34828911\n",
      " -0.30263329  1.29302925  1.44487084 -0.21012829  0.84034527 -0.01268527\n",
      " -0.03160628 -0.03192203  0.13697251  0.80190261  1.88923214 -1.07584504\n",
      "  0.80592636  0.7578784   0.55263748 -0.08810306  1.50769269  2.03518455\n",
      " -0.66006198  2.40608249  1.17913382  0.31326698  0.50052315  0.400875\n",
      "  2.40202331  0.10889961 -0.19816999 -0.01289147  0.00897973  3.47717189\n",
      " -2.20745957 -0.01875275  0.07118336  0.19789889  0.49088186  0.36702545\n",
      "  1.60104086  0.55591292 -0.07141452  0.67299722  1.14147697  0.31219313\n",
      "  1.13387218  1.7521981   2.12857053  0.3755444   1.56447044 -0.66582425\n",
      "  2.69042364  0.79028873  2.19363196 -0.71574834  1.33280036 -0.8816502\n",
      "  1.17568759  0.09135267  0.79493429  1.97072479 -0.01247572  2.26410129\n",
      " -0.04845554  1.76323425  0.91934705  0.34075806  0.93350501  1.22467513\n",
      "  1.35235111  1.1328965   1.07091087  2.1113589  -0.48367726]\n",
      "两数组之积为：\n",
      " [-0.00000000e+00 -1.21938474e-02  1.04820564e-02  1.63152904e-03\n",
      " -2.27670566e-02  4.68043816e-02 -5.74141854e-02  1.33725226e-02\n",
      "  5.95533813e-02 -1.35254856e-01 -9.55022180e-02  6.62190171e-02\n",
      " -8.21883839e-02  1.15855239e-01 -5.26482743e-02  6.91472209e-02\n",
      " -2.59417685e-01 -8.97189662e-02 -1.47565741e-01  7.42961680e-02\n",
      "  3.73076195e-01  3.53105963e-01 -1.10181928e-01  5.81363522e-02\n",
      "  2.61001033e-01 -1.20399649e-01 -3.95410773e-01  4.77490737e-01\n",
      " -8.98971837e-02  1.85103843e-01  1.80789987e-01 -3.04739066e-01\n",
      " -3.59958670e-01  1.78242334e-01 -1.70117392e-01  1.26939844e-01\n",
      "  1.40978262e-01  1.48711152e-01  9.23504448e-02 -1.60642020e-01\n",
      " -5.95692856e-01  6.09196466e-01 -1.62089073e-01 -1.40987712e-01\n",
      " -4.95604921e-02  2.42146379e-01 -4.81938639e-01 -7.35636738e-01\n",
      "  5.47229750e-01 -9.38880419e-01 -3.39566911e-01  1.00333840e-01\n",
      "  1.01279608e-02  6.84362501e-02 -1.00549259e+00  2.42605215e-01\n",
      "  4.24575195e-01  3.32248138e-01  3.31191759e-01 -1.70343142e+00\n",
      "  1.68447574e+00  3.83539177e-01  3.40266314e-01  2.72223701e-01\n",
      "  9.54356105e-02  1.83933459e-01 -6.21086969e-01  7.64383449e-02\n",
      "  5.10961874e-01  1.17319172e-02 -3.09033877e-01  2.82442877e-01\n",
      " -2.97987967e-01 -7.46204612e-01 -1.02754219e+00  2.80841704e-01\n",
      " -6.11397533e-01  1.10558467e+00 -1.49013044e+00 -2.28095553e-04\n",
      " -1.11490557e+00  1.23585616e+00 -4.20496292e-01  1.42066967e+00\n",
      " -2.81977574e-01  6.44850233e-01  5.59565120e-02 -9.57630565e-01\n",
      "  7.85378636e-01 -1.22295015e+00  8.53609985e-01 -7.76443163e-01\n",
      "  6.00715309e-04  5.47995006e-01  6.10529356e-03 -2.60941372e-01\n",
      " -3.76657065e-01 -1.58009609e-01 -8.90926496e-02 -1.11014531e+00\n",
      "  1.48367726e+00]\n",
      "两0数组之商为：\n",
      " [-0.00000000e+00 -8.20085712e-03  3.81604511e-02  5.51629778e-01\n",
      " -7.02769808e-02  5.34138026e-02 -6.27022743e-02  3.66423011e-01\n",
      "  1.07466610e-01 -5.98869441e-02 -1.04709610e-01  1.82726965e-01\n",
      " -1.75207241e-01  1.45871695e-01 -3.72281908e-01  3.25392687e-01\n",
      " -9.86825550e-02 -3.22116953e-01 -2.19563157e-01  4.85893162e-01\n",
      "  1.07216704e-01  1.24891689e-01 -4.39273491e-01  9.09929812e-01\n",
      "  2.20688782e-01 -5.19104504e-01 -1.70961452e-01  1.52673119e-01\n",
      " -8.72107410e-01  4.54339567e-01  4.97815180e-01 -3.15351757e-01\n",
      " -2.84477104e-01  6.10965966e-01 -6.79530755e-01  9.65024033e-01\n",
      "  9.19290662e-01  9.20576555e-01  1.56360914e+00 -9.46825746e-01\n",
      " -2.68594794e-01  2.75937254e-01 -1.08829051e+00 -1.31146181e+00\n",
      " -3.90633732e+00  8.36271023e-01 -4.39060044e-01 -3.00284079e-01\n",
      "  4.21029741e-01 -2.55730118e-01 -7.36231923e-01  2.59234571e+00\n",
      "  2.66983655e+01  4.10454985e+00 -2.90007111e-01  1.24688169e+00\n",
      "  7.38620634e-01  9.77883585e-01  1.01572576e+00 -2.04352225e-01\n",
      "  2.13716346e-01  9.70174684e-01  1.12970337e+00  1.45799208e+00\n",
      "  4.29189899e+00  2.29702634e+00 -7.01351054e-01  5.87270696e+00\n",
      "  9.04959887e-01  4.05816026e+01 -1.58558669e+00  1.78478567e+00\n",
      " -1.73966756e+00 -7.14147288e-01 -5.32922155e-01  2.00290766e+00\n",
      " -9.44720855e-01  5.36277334e-01 -4.08286406e-01 -2.73613401e+03\n",
      " -5.74039649e-01  5.30887027e-01 -1.59906285e+00  4.84912162e-01\n",
      " -2.50232665e+00  1.12041520e+00  1.32174071e+01 -7.90388306e-01\n",
      "  9.86021219e-01 -6.47696067e-01  9.48911112e-01 -1.06653009e+00\n",
      "  1.40898690e+03  1.57829905e+00  1.44726866e+02 -3.45863131e+00\n",
      " -2.44678803e+00 -5.95470115e+00 -1.07797894e+01 -8.82857398e-01\n",
      "  6.74001030e-01]\n"
     ]
    }
   ],
   "source": [
    "print('两数组之和为：\\n', arr1 + arr2) \n",
    "print('两数组之差为：\\n', arr1 - arr2) \n",
    "print('两数组之积为：\\n', arr1 * arr2) \n",
    "print('两0数组之商为：\\n', arr1 / arr2) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "8c311cc7",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "排序后数组为： [-2.88717189e+00 -1.91608249e+00 -1.91042364e+00 -1.86202331e+00\n",
      " -1.62136053e+00 -1.56518455e+00 -1.52081066e+00 -1.50283173e+00\n",
      " -1.48923214e+00 -1.39363196e+00 -1.38857053e+00 -1.37410129e+00\n",
      " -1.21938474e+00 -1.12487084e+00 -1.12135890e+00 -1.10072479e+00\n",
      " -1.04769269e+00 -1.02219810e+00 -9.83029246e-01 -9.56903090e-01\n",
      " -9.55022180e-01 -9.41040862e-01 -8.53234245e-01 -8.19809672e-01\n",
      " -8.04470438e-01 -6.84903199e-01 -6.79133822e-01 -5.95708179e-01\n",
      " -5.69176415e-01 -5.27758624e-01 -5.12800356e-01 -5.00826944e-01\n",
      " -5.00345272e-01 -4.81598595e-01 -4.41476967e-01 -4.13872176e-01\n",
      " -4.11902614e-01 -3.92351109e-01 -3.85926364e-01 -3.76059102e-01\n",
      " -3.35687589e-01 -3.27878401e-01 -3.21061370e-01 -2.74675128e-01\n",
      " -1.62896504e-01 -1.12637482e-01 -9.09108669e-02 -2.88728548e-04\n",
      "  6.52951423e-04  6.49499315e-03  1.70027785e-02  1.94768477e-02\n",
      "  5.43843012e-02  6.50657116e-02  1.14087082e-01  1.29125000e-01\n",
      "  1.49118141e-01  1.91036037e-01  1.96733020e-01  2.43027486e-01\n",
      "  2.52766749e-01  2.82974553e-01  3.62685268e-01  3.74455605e-01\n",
      "  3.91032463e-01  3.91606284e-01  3.97806869e-01  4.01922033e-01\n",
      "  4.32101112e-01  4.41100391e-01  4.60981472e-01  5.24102819e-01\n",
      "  5.38103064e-01  5.40128286e-01  5.48816635e-01  5.71020274e-01\n",
      "  5.82891470e-01  5.89241942e-01  6.01991064e-01  6.02633291e-01\n",
      "  6.28752749e-01  6.38289114e-01  7.44417266e-01  7.51414521e-01\n",
      "  7.58169992e-01  7.58647333e-01  8.91194143e-01  8.92475723e-01\n",
      "  9.36087632e-01  9.48455539e-01  1.08750430e+00  1.14006198e+00\n",
      "  1.43582425e+00  1.48367726e+00  1.48584504e+00  1.52574834e+00\n",
      "  1.68145697e+00  1.71165020e+00  1.76848421e+00  1.86538097e+00\n",
      "  2.80745957e+00]\n",
      "数组的和为： -6.065477230582283\n",
      "数组的均值为： -0.06005423000576517\n",
      "数组的标准差为： 0.9773561123162057\n",
      "数组的最小值为： -2.887171889871751\n"
     ]
    }
   ],
   "source": [
    "print('排序后数组为：', np.sort(arr2)) \n",
    "print('数组的和为：', np.sum(arr2)) \n",
    "print('数组的均值为：', np.mean(arr2)) \n",
    "print('数组的标准差为：', np.std(arr2)) \n",
    "print('数组的最小值为：', np.min(arr2)) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "b3f22b18",
   "metadata": {},
   "outputs": [],
   "source": [
    "np.savez('arr.npz', arr1, arr2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e7a4a8f4",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 2、(20.0分)请编写Python代码，对鸢尾花数据完成下列操作：\n",
    "# ①读取数据文件iris.csv，储存为数据框iris，\n",
    "# 并将数据框的列名称从左至右依次修改为\n",
    "# “sepal_length”“sepal_width”“petal_length”“petal_width”“class”。（3分）\n",
    "# ②将数据框iris中“petal_length”列的第0行至第9行设置为缺失值。\n",
    "# （注意：在Python中，索引为0开始。）（3分）\n",
    "# ③将数据框iris中“petal_length”列的缺失值全部替换为1.0。（3分）\n",
    "# ④删除数据框iris中“class”列。（2分）\n",
    "# ⑤将数据框iris的前3行设置为缺失值。（2分）\n",
    "# ⑥删除数据框iris中存在缺失值的行。（2分）\n",
    "# 7重新设置数据框iris的行索引。（3分）\n",
    "# 8将数据框iris保存到当前工作路径下并命名为iris_new.csv。（2分）"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "6344828f",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd \n",
    "iris = pd.read_csv('iris.csv', header=None) \n",
    "iris.columns = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'class'] "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "273057e5",
   "metadata": {},
   "outputs": [],
   "source": [
    "iris.loc[0: 9, 'petal_length'] = None \n",
    "iris['petal_length'].fillna(1.0, inplace=True) \n",
    "del iris['class'] \n",
    "iris.iloc[0: 3, :] = None\n",
    "iris.dropna(how='any', inplace=True) \n",
    "iris.reset_index(drop=True, inplace=True) \n",
    "iris.to_csv('iris_new.csv') "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "085ac6f5",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 3、(25.0分)葡萄酒是以葡萄为原料酿造的一种果酒。葡萄酒的品种很多，因葡萄的栽培、\n",
    "# 葡萄酒生产工艺条件的不同，产品风格各不相同。wine数据集包含3种不同的葡萄酒的记录共178条。\n",
    "# 其中，每个特征对应葡萄酒的每种化学成分，并且都属于连续型数据。\n",
    "# 通过对wine数据集的数据进行聚类，实现葡萄酒的类别划分。\n",
    "# 请依据wine数据集，编写Python代码完成下列操作：\n",
    "# 1.读取数据文件wine.csv，并储存为数据框wine。（1分）\n",
    "# 2查询据框wine中是否存在缺失值。若存在缺失值，则需对其进行处理；反之，则无需处理。（2分）\n",
    "# 3在wine数据集中，“Class”列为葡萄酒的类别，分别为1、2和3。绘制各类别的数量占比饼图。（4分）\n",
    "# 4将数据框wine的数据和标签进行拆分，分别储存至数据框wine_data和数据框wine_label。（3分）\n",
    "# 5将数据划分为训练集和测试集，训练集和测试集样本数比例为8:2，\n",
    "#     并将训练集数据、测试集数据、训练集标签和测试集标签分别储存至数据框wine_train、\n",
    "#     数据框wine_test、数据框wine_train_label和数据框wine_test_label。（3分）\n",
    "# 6构建聚类数目为3的K-Means模型，并命名为kmeans。（4分）\n",
    "# 7对比真实标签和聚类标签，求取FMI（FMI为聚类模型的评价指标），并输出其结果。（4分）\n",
    "# 8当聚类数目为2~10类时，确定最优聚类数目。（4分）"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "65ed0d7c",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Class                           0\n",
      "Alcohol                         0\n",
      "Malic_acid                      0\n",
      "Ash                             0\n",
      "Alcalinity_of_ash               0\n",
      "Magnesium                       0\n",
      "Total_phenols                   0\n",
      "Flavanoids                      0\n",
      "Nonflavanoid_phenols            0\n",
      "Proanthocyanins                 0\n",
      "Color_intensity                 0\n",
      "Hue                             0\n",
      "OD280/OD315_of_diluted_wines    0\n",
      "Proline                         0\n",
      "dtype: int64\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd \n",
    "wine = pd.read_csv('wine.csv', encoding='gb18030') \n",
    "print(wine.isnull().sum())  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "47b69797",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAOcAAAD3CAYAAADmIkO7AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAhkElEQVR4nO3dd3gc1dn38e+9VXJbN4wrlu3YNBeqAxhshx5EeCAQCA8QkVATSCihKJRk81IiSHkgIZSQBAgQekeUEIpNtemswdjGtgzuTVpbXbt73j9mBLItWZK9u2d29/5cly5ry8y5V97fntk5M3PEGINSynt8tgtQSrVPw6mUR2k4lfIoDadSHqXhVMqjNJw5TkTEdg0qMzScWyEiIRH5n84CICL9RWRQF9d5tIhMEpGAiAQ3X7c4giLia3Pf7iLyY/f3H4jIne7v3wNu7eZrGicis7by+HsiMr2b62wWkaLuLKM6F7BdgMd9D/iJMeapTp53NrAjcJGIBACMMQn4umcLGWOa3OcOBO4C7ge+AyRFZFfgU2B3YC7gB84F3naXqQeuE5EvgBagSUR6ADcCF7ctRESuB34MxN27xgARY0y9e7vZXUdHhrOV94X7wfCWMeauNnd3uE4R+cBdX/NW2hxhjNlxK48XJA1nB9yQ/RYYISLvuXf7gd2A/Y0xH7jPKwJOBL4jIr2B44GfikhPYAfgC0CAAwCMMXeLyH+MMcvbtPW5MWYf99+9NqsjDCwBfgFMAT53H5oMvAK8ICLhNuFvAa42xvzdXb4KaBaRU90abuzCy/+niNS3uS3AP40xN9B+EBPGmKTb3mhgb2PMI23q+b4xpkpEDgCuBo4y7tEv7t+5qgs1FRwNZ8duBP4DTAQuBz7C6e3ubg2m61LgJqAO+AQ40g3gocCpxpjTW58oIhPc9f7cvb0Ap4cb6X4AtP7bZIyZ4i72FNAP503eB5gArMAJah3wBhAWkUONMTU4QRoqIuPd5YPufY1svfdq6yfGmP+2qduH88EE0O4hZe6H0S+A84C/tHmoxX08AtwJNADvisguOH/XO4BEF+sqKBrOdojIbsAonB5xKPAEsBR4zxhzc5vnTQMuwtlMPR54wRjz5VZWvQinF3xTREYB4vaYn7XpOfdxN18BMMYc6ba1M3A3cDuQBFYCBwEXGmPmtmljHnAccDLOJvLbbOe+BWNMCki1uevPInID0As4373vE+ABYKIxZv1mq/ADzwDrgdeBd4Azgdu2t7Z8puFshzHmM+A4ERkCnAQ04bwRR4nIkTjfuTbgvPnPxOkdprvP3dp664BzReQGY0y9iCRFZCywxn1KkbsZm2xdRkS+DfwEGIvz3XZ34BBjzLXujpunRORvxpg/uG3cD9zvbpZOMsY0u+vZ3j9LW78wxtzXpsa/4ISyroPnJ3G2Ftbi9PQXAqXGmFTbHV9qUxrOzbg91KnAgTg7Yu4DphpjWtygnAxUuJtx+wPPA5U4m7CNIiKt36e2YiSw2P39apweBJze5U023XTsATxqjHnJra8vzgcFxpjXRGQyMMR9bBeczcQWnN7qOTeUM4HPOni9pwJ/A1p7/BrgljZh/hZO8NpdvtVWggkwAoi6v//bfY3niMjf+KbnVZvRcG5pLc4m2oPAe8Aw4PI2b9adcAL6Bk6P+howGLhdRIqBE/jmjb4FN/xPuz1mD5zNzmIROR143/0pb7PIX4A6EfmNe7sEGCQib7SuEuglIhfjvOl/DAwC/g6cDuyLs8n9ufvczTUCs40x0zuodyXOh1R7j/mBvh291ja+An4ELAdOA67H+UA4x21ftUPDuRljzDrgEREZDMwzxuzR9nERuQ9obu0pROTPOL3SPLd37exv+jvg98aYVSJicN64m/s6RMaY8V/fKTIQJ4D3AcuNMVe1s+wiETkYZ9N7qTtM04Cz06XdkG0LETmm9bW089iJwLPu8I24r2OZiFyO80HxGDDDGBN3/156IEU7NJwdS27lsdZhAAE+xhnWuEBEpgCXiMjzbZ8sIkGcHSrH4+zE+ZH7UD/g2nbW7297w+2hjgIqgEtwNqP/KCIvAr81xrzV5rnjgF8DR7t39QLqjTGPA4+LyBi23OM6WUQ+p30DN7s9DvhfnK2KM40xb4vIH0VksDFmpbv++3B2pNXj7C0GwBhzg4jcBZwC7Iwztgv6PmyX/lE6FgB2FpGPNrt/J+Ae9/f+wHPAkzh7Ks/D6RWuxd276Q6NhHB2iLyI84audZdfv3nP7C6zxP3Xh7MneBrOXs7jjDHz3addJCKH4+w57Yczhrkr8DDOHtxPRCQKnIEzZIGIHAtcBixo01yYzjdre7a560HgT8aYF9rcdwfwiltvCmecda37WADnu+8Wwzgi8us2z1GbEb0SQvvcN1o/dzPXZh27AVVtjvBp7zkjjTFL3J1Uk1p7UneTtt4Y0xr20TjjpC9tbX3pJCIDgJrWgxRU12k4lfIoHWNSyqM0nEp5lIZTKY/ScCrlURpOpTxKw6mUR2k4lfIoDadSHqXhVMqjNJxKeZSGUymP0nAq5VEaTqU8SsOplEdpOJXyKA2nUh6l4VTKo/TaLSot3OkWHsS5OFkdcFLrBa3VttGeU6XLKTgX/jocZ6qIIy3Xk/O051RpYYxpO0/oDsBqW7XkC+05VVqJyP44Vy18x3YtuU6vvqfSRkT640ybeHzr5TjVttOeU6WFiISAR4BfaTDTQ8Op0uUMYC/gShF5TUS2Oh2i6pxu1npYSXml4EyftzPOtIEDgQHuT/82v4dx5j9p+9M64W0NzvyfbX9W4+xRnVdVUbo0ay9IdYuG0yNKyitLgP1w5jvZ2f0ZhzNNYCZtwJkecC7ObGmfAR9WVZQuy3C7qhMaTgtKyit9wCRgCs4kvVOA4VaL2tJiYAbOxLszqypKF1qup+BoOLOkpLxyCPA992cq0MduRd22FCeozwGVVRWlNXbLyX8azgwqKa8cCfwAZ7bryeTPJLEtOL3qI8BjVRWlVmdiy1cazjQrKa/siXMo2xk4gcx3CeC/OBPmPlJVUarH06aJhjNNSsorJwA/xQlmrm2ypstq4O/A7VUVpV/ZLibXaTi3Q0l5ZQg4ETgXZ6eOciSBZ4C/Ai9XVZTqm2wbaDi3gRvKM4Bf4YxDqo7NBa4BHtSQdo+GsxtKyiuDwE+AK4CdLJeTa+YAv6mqKH3cdiG5QsPZBW4oTweuxDlSR22794FfV1WUPme7EK/TcHaipLzyOzjfnXa1XUueeRP4eVVF6Ye2C/EqDWcH3IMG/gicbLuWPJYEbgGurqoo3Wi7GK/RcG6mpLwyAPwc+C3Q23I5hWIZcGFVRemjtgvxEg1nGyXllXsBdwETbddSoJ4DzquqKK2yXYgXaDj5+tSsXwLXASHL5RS6jcBPqypK77ddiG0FH073u+W/gENt16I2cQ9OL1pnuxBbCjqcJeWVxwD/wDmJWXnPfOCHhbpHtyDD6e70+RPOjh/lbc3AZVUVpTfbLiTbCi6cJeWVEZxTnQ6zXYvqln8BZxXSWS8FFc6S8soxOAdk6wEFuWkGcFxVRWm17UKyoWCuvldSXjkVmIUGM5dNA95xP2TzXkGEs6S88kfASzhXqlO5bRxOQPP+FL28D2dJeeV5wN3o+GU+GQi8XFJe+T+2C8mkvA5nSXnlL3CO3cyXa/eob4SBR9zhsLyUt+EsKa+8CCi43e8FJogT0O/ZLiQT8jKcJeWVl+KMY6r8FwIeLSmvPNp2IemWd+EsKa+8BLjRdh0qq0LAY/kW0Lwa5ywpr/xfnEs06nfMwtQEHFZVUfq67ULSIW/C6Y5jvoTulS1064D9qipKv7BdyPbKi3CWlFfuDLyFM/OWUvNxAprTRxLl/HfOkvLKHXBO0tVgqlbjgMfdC7PlrJwOZ0l5ZRHwNDDadi3Kc6YDf7NdxPbI6XDijGPuZ7sIr0o2bKRh8Yck6+OZbaeuGpNMZLSNbXR6SXnlxbaL2FY5+52zpLzyFJw9s6odycZa1jwSpXjMvtTNncmAoy4g/sa/STXXExoyjv4Hn9nhcmuf+T2p+jihHb/FgCPPp6VmJdUv3b7Jshvef4a6Oa+w48m/o37+m/Qaf0iWX2GXtQAHVVWUzrJdSHflZM/Z/Jv+Y3eVJeW26/CyltWL6XfwmUQOOIniUXux+qGriRzwQwafciPJjeto/PKTdperm/MKPXebzpCym0g1N9C0YgE1r929xbItqxfTa9LhNK+cjwSLsvzquiUIPFRSXtnXdiHdlXvhjEaCIUk+8FzoV6PO9z/xhu1yvKpopwmEh+1C41dzaFoxH19RL0KDnTOt/D0ipJrq213OV9yblrVLSDXWkty4hkCfgbRUL9tiWWMMJpmgYfGHFI/eO2uvaxuNBO60XUR35V44IQrsLULPS4KPHPhi6LI3e9JQa7soLzLGUDf3dXxFveg54VBq3nyA+i9m0bD4fYpGTmp3maLhu5OIr2HD+88QHDACX1Fveuw8ZYtli0ftScPCdwn0Hsiax66hcUn7PbGHnFBSXllmu4juyK3vnNHIgThnw2/yodJsAotPar66+UMzdmc7hXlbzcx7Ce4wEn/vAWyY9TihIWPpe8AP233u2uduov8hZ+ML92DD7CeQUDG99ziSxqWfbrFs07LPScRXkqyrIVGzgv6H/TSbL2tbbAQmVVWULrZdSFfkTs8ZjfQB7qWdmkOSGPV46DclVwfunZH9wrwp/s6j1M55GYBUUx2+cE9Cg0aT2LCGPvse2+FyqcZamtdUYVJJmlbM//r+9pZtqV5OoO8QxB8kRz7kewO32S6iq3InnHAtUNLRgyKEzwg8P21m6IJ3ItTWZK0qj+q1x5HUzXmVlfdfjjEpikbtxYZZj9Nn32PxuTtwmtd+SfXMezdZLrLfD1j/wi18ddNJpBo20nO3aQBbLJtqqsffsx/BASPY+PELFI/cI6uvbzscUVJe+QPbRXRFbmzWRiNjgU9x9rx1KmF8y37cctn611MTJ2S2MJWjlgG7en3ypFzpOSvoYjABApIa9q9gxa5/DN72GuTCp4/KsmE4E1V5mvd7zmhkCrDNQyZrTOT90qbrd1pNvx3SWJXKfQlg76qKUs/uZs6FnvMP27PwDhLf++3w+Rzlm/VBugpSeSEA3Gq7iK3xds8ZjZwIPJSOVRlD6uXUnjPPabn4wCT+QDrWqfLCMVUVpc/YLqI93g1nNBIC5pLmM07ipkfs6Obr+39lBg1L53pVzvoQZ/PWc0Hw8mbteWTgVLCI1E+YEbqw58n+l3PuQGiVEXsCx9ouoj3e7Dmjkb7AQjJ8AvXs1M4zTm2+Yr9mguFMtqM87xNgD6/1nl7tOa8iC1c2mOybN+2j8NmLx8rSqky3pTxtInCC7SI2572eMxqJAMuBHtlq0hhqb0j88OPbk8fk/fwbqkOfAhO81Ht6sec8hSwGE0CEXuXBB6c8Hyp/oweNBTvNeYHbHTjcdhFteTGcZ9lqeFfflwd+GD571ST5Yn7nz1Z5yFOn1XhrszYa2Qd413YZxtB4Z7J09vWJU6barkVlVRIYVVVR+pXtQsB7Pae1XrMtEYrODlROnRG68J0+1Gb26ljKS/zAObaLaOWdnjMa6QmswDnnzjNajH/p6S2X17yZGj/edi0qK1YBI6oqSltsF+KlnvMkPBZMgKAkh98XvH6XGwN3zNAzXArCjsD3bRcB3gqnJzZp2yNC4MTAjGmzw+e9P5CaNbbrURnniWsNeWOzNhoZD8Rsl9EVSSOrftZywfIXU5P3tF2LyphmYFBVRanV/Q1e6Tk922tuzi9mx9uDN026I/in13ykkrbrURkRAqzPlm0/nNFIADjVdhndIYLvCP970z8In/PpMNassF2PyojjbRdgP5ywNzk6Q1hfqZv4eviCohP9r862XYtKuyNKyit72izAC+GcbruA7eET+t0YvHPyg6FrZgRJNNuuR6VNMXCUzQI0nGmyn2/utI/CZy0cI8uW2K5Fpc2xNhu3G07n+2benAnSU5p2/W/o0v5n+59903YtKi2m22zcds+5Nx488GB7iND7iuC/pzwbuuL1YjqYLUjliqEl5ZVjbDVuO5zTLbefMeN9VQd9FD57xURZuMB2LWq7WDv5QcOZQWFpGfNU6OoRlwcemGm7FrXNDrLVsL1w5tn3zY6IUPTTwDNTXw1d9HZv6vQMl9xTkD1n3n3f3JpRvlX7fxA+d8N+vk8/tV2L6pYxJeWVQ2w0bDOc0y22bUVQkiMeCF437neBO/UMl9yyr41GbYYz7zdp2yNC8OTAq9PeCZ///gDia23Xo7pkVxuN2gznLhbbtm6wVO8zO/yzxOG+dz+0XYvqVAGFMxrxs5WJcAuFX8zgO4L/N+nW4E16hou3WelIbPWcI+jGfJv5TATfUf7Z098PnztnKGv1DBdvKqhwWjvqwqv6Se2kN8IXhI73zbR+9UG1hYiNPbYaTg/xiRnwh+Dt+9wfvG5GkIT1C0ypTWS997QVzp0stet5IsgU/6fTPgyfvWC0LNczXLxjeLYbtBVOK4O6uaSXNO72cuiSfmf4n3vLdi0KgIHZbtBWOAdbajeniNDn6uB9BzwduvL1IpoabNdT4DScaksTfYsP+ih89rLdZfEXtmspYDtku0ENZ44okpZvPRu6ctgvAw+/bruWAlUwPecgS+3mNBGKfx548qCXQ798qxf1G2zXU2AKIJzRiACBrLebR8b4VhzwYfic6sky9zPbtRSQAdlu0EI44wZnqjW1HYKSHPlQ6Jqx1wb+oWe4ZEc42w3a2qxNWGo3r4gQPDXw8rS3wz9/rx8b1tuuJ8/5s92ghjMPDJH1+74X/lnTwb4PPrZdSx7LejhtfffTcKaZX1JDflN8U8vsEUO1B80IXxxKs9qihjOPLAgH14vooZGZkcr69Z90szaPLAgFa23XkMeyfiKChjOPLAiFdC945mT9PavhzCNVwYCOH2dO1iep0nDmkdV+fy/bNeSx1dluUMOZR2p9vqwfYlZAlmW7QVvh1N39aZaARFJPKMikggmnTu6TZssDgVWIZH2gvIAUTDjnW2o3by0MBfUC1Zm1NNsNas+ZJ+bpGGemac+pts0XwaBerS+zCiac2nOmWVUwqGOcmdMMrMl2o5amY4jXAcuttJ2nVgV0jDODVsTKYlk/Z9bmREbae6bRRh3jzKRPbDRqM5z6vTNNkpDUMc6MmmWj0bztOdc3GF5amGBtfard2+myqjZFS9LuVUJWBPwrEdHvnJlTcOHMWM9Z3WA4+t/1zF6W5Dv31LOmLrXF7Y6sqk1x0F11X99OpAw7/d9Gpt9dx/S764itSnLL7Gb2vbOWumbDiwsTBP2SqZfSJQuDwXVWC8hvBrAyuZTNT9uPMrXiT1Yl+dMRYfYbHqC60XDruy2b3P5gRYojvrXl51J1g6HsyQbqmk2bdaU4eXyQGw4r+vq+m2c1c+aeId5dnqRn0G4wAeaHQhtt15DHPo+VxbJ+ojXY7Dmj8SVkqPecVhJgv+EBZi5JMHtZkov2D21ye/8R7R/l5vfBQyf0oE/4m8C9szTJswsSTL6zljOeaiCRMhgDLSn4z8IE3x1rf2tyfiioJxJkjpVNWrC7WQvwQqZWbIzhoTkt9CsWgr4tb7enT1iIFG3aE+471M9/T+vB7LN60ZKC5xYkOHxMgGfntzC8j49jHqjn1cV2s1EVDOoxtZlTsOF8PlMrFhH+WlrMxEE+np6X2OJ2V03c0ceQ3s6faZ+hPhasS3HS+CDR6WH6FgmlYwM8NtfuwTmrAv6eVgvIbwUbzhlAY7pXesMbTfzrY+fE9ZpGQ1VNapPbfYu6/j3xtCca+HhlkmTK8OTnCSYNdjqpBetSjOknhANCyvIlnTf6fFm/GnmBqAdithq3G85ovAF4Ld2rPXvvEPd+0sLUu+pImi1vHz7Gz2drklz1SuefC7+eFua0JxrY44469h/u59DRATY0GQb38rHbDn7+9n4zh462970zBamEzneaKS/FymLWvrOIsX0l/2jkAuAmu0XkrmUB/4ojRwzTcGbGabGy2H22Gre9WQsZ3ClUCBYGg1k/ILtANANP2yzAfjij8XnAYttl5KoFOsaZKf+JlcWsTrNoP5wO7T23kY5xZsyjtgvQcOa4qmBAxzjTrwV4ynYRXgnnS4CVQ6Ry3cpAQMc40+/lWFmsxnYR3ginM6Ryr+0yctEGHePMBOubtOCVcDrusF1ArjFgEnoeZ7o1A0/aLgK8FM5ofA7wpu0ycslKv38VIiHbdeSZh2NlMU+cguedcDq09+yGhSEd48yAm20X0Cor4RSRHUXk9S489REsXOUsV80PBXWMM73eiZXF3rNdRKuMh1NE+gH3AJ3vVYzGG4FbM11TvlgQCmV9WrpMSsQTmITVw0n/bLPxzWXjiO0kcBJdHze6BbgMKM5YRXkiG2OcyfokX932FSZl8IV9jPjZCHwBH8v/tZxeE3rRZ88+7S7XvKaZ5fcuJ9WYonhUMUNOHsK6V9YRn+WMmKXqUxSPKaZoeBE1b9YwqnwUG+dspN+Ufpl+SR2pwtly84yM95zGmA3GmK6PYUbja4G7M1ZQHlnhz/wYZ83bNQw8YiCjLh1FMBKkNlZL3bw6EvFEh8EEWPnwSgYdM4jRV4ympbqF2rm1DDh4AKN/NZrRvxpNj3E96D+tP41fNtJvaj/qF9XjC1ndBfIHm2egtMdrO4Ra/QlI72Xy8tAGv69/ptsYcMgAeo13rled2JjA38PPsruWERwYZMMHHR962ryymaIS57pLgd4BUg3f/He2VLeQ2JCgeFQxxhhM0lD7aS29J/bO7Ivp2Grgn7Ya74g3wxmNf4FHBoK9yoBpyeJ5nPVf1JOsS9K0somiYUUM/O5AGhY1sO6l9kcd+uzbhzVPrmHDhxuonVNLz92+6eTXvbyO/gc7nyu9x/dm40cbCfYLsuTmJdTOtTIf002xsliDjYa3xpvhdFxOBq6SkC9W+/2rEQlno61EbYLl9y1n2BnDnM3Qaf0I9g0SOSBC3ed17S4z6JhB9JrYi+qZ1fSd0hd/kfP12KQMdXPr6LmLE9bItyMMOnYQ/h5+ek/qzYb3sn4iyArgL9lutCuyFk5jzPRuLRCNVwE3ZKKWfLAoS+dxphIpvvrrVww+YTChgSFCg0I0r3F2EjcsbiA4INjhskU7FdGyroWBR3wzU0T9/Hp6jO6ByDeXimla1URoUAgJiHOV2Oy6PFYW8+T0iV7uOQEq0HM92zU/FMxKF1M9s5qGJQ2sfmY1i363CH9PP3Vz61h0/SLWv7Kegd8dSOOyRlY9tmqLZdc+v5YBRwzAF/7mbbZxzkZ67Nzj69vJhiTBSJDw0DDVr1VvsvmbBW8B1q500Bn7lynpTDRyLPCE7TK85sqB/Wc83bvXNNt15LAUMDlWFnvfdiEd8XrPCdH4k+j5nltYFOzo6ruqi/7p5WBCLoTT8QucswWUa2UgoAdpbLsa4ArbRXQmN8IZjS/AGftUrrhfz+PcDtFYWczzx3DnRjgd1wJLbRfhBQbI5hhnnvkU+KvtIroid8LpTFV/ie0yvGCN378GkaLOn6k20wyUee0wvY7kTjgBovGHgGdtl2HbomBgte0actTlXt8J1FZuhdNxGrDQdhE2LQiF9GJo3fdMrCx2k+0iuiP3whmN1wDH4UwyU5Dmh4J2pzXLPUuBH9suortyL5wA0XgMOMt2GbboGGe3JIH/9cp1gbojd/+To/F/47Ez17NlRcCvY5xd9/9iZbGuXCLHc3I3nI5LgJz8w2+PuM9v7XIBOeZVnCG4nJTb4YzGW4ATcU77KRjNomOcXfAlcEqsLJazJ+3ndjgBovGVwAk481vkvbV+31pEenT+zIK2Fjg8VhbL6Q/t3A8nQDT+FnCx7TKyYVEwqGOcW1cLHBUri82zXcj2yo9wAkTjt1AAM2TPDwV1jLNjzcBxsbLYu7YLSYf8CSdANH4R8HvbZWTSglCoyXYNHpUCTo2Vxf5ru5B0ya9wAkTjlwHX2S4jU3SMs0Pnx8pinrru7PbKz//oaPwqIGq7jExYrmOc7YnGymK32S4i3fIznADR+G+BK22XkW41Pn/Gr1WbY66LlcV+a7uITMjfcAJE49cDl9ouI52aRefjdBngglhZ7CrbhWRKfocTIBr/A3Ch7TLSYb3Ptw4RnWbeGdM+LVYWy+vDN/M/nADR+M3Aedi4Kmoa6RgnABuA78XKYvfbLiTTCiOcANH4rTiznXnyAsJdsUDHOJcAU2JlsRdtF5INhRNOgGj8EeDbwHzbpWyLeYU9xjkb+HasLDansyeKSH8ROUxEBnb2XC8rrHACROOfAfsCT1qupNsWhQLS+bPy0t+B6bGy2JaXld+MO1nzs8Bk4FUR2SHTxWVK4YUTIBrfAHwf59qlOXGxJ4DlgUChXdRrHc7heGd1YxawicDFxpjrgBeBvTJWXYZ5fzqGTItG9gfuB0bZLqUz+4wcvqDJ5xtru44seQnnSnnbdGaJiEzFOZfzaGNM1qcuS4fC7DnbisbfBvbACainNYkUwhhnE84ZRkdsRzAFZ+dfNTl8KqH2nG1FI6fiXHC44/nULan2+aqnjhye71dA+BTnej+fpGNlInINMMcY81A61pdt2nO2FY3fB+wM/AOPTXu/OBhYabuGDErgnO63z/YGU0QuF5EfuTf74syLkpO05+xINDIJZ36Wg22XAvBQ716zrh3Y/9u268iAZ4FLY2Wxz9OxMndv7cNAGJgDnGdy9E2u4exMNHIMzjmi42yWcc2AfjMe7tM7n+bj/Bj4Zaws9rLtQrxKN2s7E40/DYzHOT632lYZC4PBfBnjXAGcCeylwdw67Tm7IxrpD/wa+BkQzGbTh40YOntlIDA5m22mWT3wR+CGWFmsznYxuUDDuS2ikXHAVcAPgKwcGLDvyOHzG30+q5vW22g1cAdwa6wsls87tdJOw7k9nJ60DDgHZy9vxkwoGRFHJJLJNtLsPeAvwIOxspjOSr4NNJzpEo1MB87FmWQplM5Vx32++IEjh+dCMBPAY8CfY2Wxt2wXk+s0nOkWjeyAM6PV2cCYdKzyo3Bo3mlDB2e0Z95OS4F7gNtiZbFltovJFxrOTIlGBDgUZza0w4Ft7vke7t1r1jXeG+NcgtNLPgq8EyuL6RspzQK2C8hb0bjBOXj7JaIRP7A3cAjOQQ1TgC5fRW9BKNiYkRq7J4XzPfI5oDJWFnvPcj15T3tOG6KRMLA/TlAPwTn3sMMPyp8MHjTz3eKiqVmqrlUj8AnwAfAW8EKsLLYmyzUUNA2nF0QjvYCpwDRgd2AszilsQYAjhg+dvTyY0THOOuAjnCC2/nwWK4vlzLmu+UjD6VXRSAAoAcaVDh8y9MtgcAwwHBjm/vTECW+gnX9bJXFm3FqFM964qs1P6+0qYF4uT5WXrzSceWjCPRMCOCFt1tDlLg2nUh6lB74r5VEaTqU8SsOplEdpOJXyKA2nUh6l4VTKozScSnmUhlMpj9JwKuVRGk6lPErDqZRHaTiV8igNp1IepeFUyqM0nEp5lIZTKY/ScCrlURpOpTxKw6mUR2k4lfIoDadSHqXhVMqjNJxKeZSGUymP0nAq5VEaTqU8SsOplEf9f9NQpw7CPjJ3AAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "wine_class = wine['Class'].value_counts() # 各类别数量 \n",
    "import matplotlib.pyplot as plt \n",
    "plt.rcParams['font.sans-serif'] = 'SimHei' # 正常显示中文 \n",
    "plt.rcParams['axes.unicode_minus'] = False # 正常显示符号 \n",
    "plt.pie(wine_class, labels=wine_class.index, autopct='%.2f%%') # 小数点个数 \n",
    "plt.title('各红酒类别占比图') \n",
    "plt.show() "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "111fb347",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 4\n",
    "wine_data = wine.iloc[: , 1: 14] # 数据 \n",
    "wine_label = wine.iloc[: , 0] # 标签 "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "e105975f",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 5\n",
    "from sklearn.model_selection import train_test_split \n",
    "wine_train, wine_test, wine_train_label, wine_test_label = train_test_split(wine_data, wine_label, test_size=0.2, random_state=42) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "b8588c7e",
   "metadata": {},
   "outputs": [],
   "source": [
    "# ⑥ \n",
    "from sklearn.cluster import KMeans # 导入分类器库 \n",
    "kmeans = KMeans(n_clusters = 3, random_state=123).fit(wine_train) # 构建并训练模型 "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "abe4733b",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "wine数据集的类中心为3时，其FMI的评价分值为：0.561796\n"
     ]
    }
   ],
   "source": [
    "# ⑦ \n",
    "from sklearn.metrics import fowlkes_mallows_score \n",
    "score = fowlkes_mallows_score(wine_train_label.tolist(), kmeans.labels_) \n",
    "print('wine数据集的类中心为3时，其FMI的评价分值为：%f'%score) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "7b87aa20",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "wine数据聚10类FMI评价分值为：0.345342\n"
     ]
    }
   ],
   "source": [
    "# ⑧ \n",
    "for i in range(2, 11): \n",
    "    kmeans = KMeans(n_clusters=i, random_state=123).fit(wine_train) \n",
    "score = fowlkes_mallows_score(wine_train_label, kmeans.labels_) \n",
    "print('wine数据聚%d类FMI评价分值为：%f' %(i, score))      "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ecc00786",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 1、(15.0分)国际象棋棋盘是个正方形，由横纵向各8格、颜色一深一浅交错排列的64个小方格组成，\n",
    "# 深色格为黑格，浅色格为白格，棋子就在这些格子中移动。\n",
    "# 接下来创建一个与国际象棋棋盘相似的8×8矩阵。\n",
    "# 1启动Jupyter Notebook创建一个Notebook。（2分）\n",
    "# 2创建一个8×8的全0数组，并储存至数组arr。（3分）\n",
    "# 3将数组arr的奇数行奇数列和偶数行偶数列的元素设置为1。（4分）\n",
    "# 4将数组arr转换为矩阵matr1。（2分）\n",
    "# 5将矩阵matr1转置为矩阵matr2，并判断矩阵mate1与矩阵matr2是否完全相同。（3分）\n",
    "# 6将矩阵matr2存储为当前工作路径下的一个二进制格式的文件matr2.npy。（2分）"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "a30a8755",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "True\n"
     ]
    }
   ],
   "source": [
    "# ① \n",
    "import numpy as np \n",
    "# ② \n",
    "arr = np.zeros((8, 8))        # 创建一个8*8数组 \n",
    "# ③ \n",
    "for i in range(8): # 按行循环 \n",
    "    for j in range(8): # 按列循环 \n",
    "        if(i + j) % 2 == 0: # 找出行索引与列索引之和为偶数的元素 \n",
    "            arr[i][j] = 1 # 将其设置为1 \n",
    "# ④ \n",
    "matr1 = np.matrix(arr) \n",
    "# ⑤ \n",
    "matr2 = matr1.T \n",
    "print((matr1 == matr2).all()) \n",
    "# ⑥ \n",
    "np.save('matr2.npy', matr2) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "72431d7e",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 2、(20.0分)招聘数据探索与分析。\n",
    "# 1读取数据文件job_info.csv，并储存为数据框job_info。（2分）\n",
    "# 2将数据框job_info的列名称从左至右依次修改为“公司”“岗位”“工作地点”“工资”“发布日期”。（2分）\n",
    "# 3统计数据中需求最多的岗位招聘，并输出其结果。（2分）\n",
    "# 4获取数据中9月3日发布的招聘信息，并输出其结果。（3分）\n",
    "# 5获取工作地点在深圳的数据分析师招聘信息，并输出其结果。（3分）\n",
    "# 6获取在“工资”列中，以“千/月”或“千/年”或“万/月”或“万/年”结尾的\n",
    "# 数据所在的行的数据，并储存至数据框job_info_new。（3分）\n",
    "# 7根据“工资”列，在数据框job_info_new中，新增最低工资和最高工资两列，\n",
    "# 列名分别设置为“最低工资（元/月）”和“最高工资（元/月）”。\n",
    "# （注意：这两列数据的单位是“元/月”，\n",
    "# 例如：若2-2.5万/月，则最低工资为20000，最高工资为25000。）（5分）"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "9242481c",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "数据分析师\n",
      "                            公司       岗位    工作地点          工资   发布日期\n",
      "0                         字节跳动   数据产品经理      北京    2-3.5万/月  09-03\n",
      "1              甲骨文（中国）软件系统有限公司   数据产品经理      长沙         NaN  09-03\n",
      "2     莱茵技术(上海)有限公司 TUV Rhei...   数据产品经理  上海-静安区         NaN  09-03\n",
      "3          百度在线网络技术（北京）有限公司...   数据产品经理      北京      2-4万/月  09-03\n",
      "4                     携程旅行网业务区   数据产品经理  上海-长宁区    1.5-2万/月  09-03\n",
      "...                        ...      ...     ...         ...    ...\n",
      "5348            无锡明恒混合动力技术有限公司  仓库数据分析员  无锡-惠山区  0.9-1.6万/月  09-03\n",
      "5349              四川良柏财税咨询有限公司  仓库数据分析员  成都-青羊区    4.5-6千/月  09-03\n",
      "5350       陕西斯达防爆安全科技股份有限公司...  仓库数据分析员      西安  0.8-1.5万/月  09-03\n",
      "5351                珠海文景科技有限公司  仓库数据分析员    异地招聘    1-1.5万/月  09-03\n",
      "5352              广州友财信息科技有限公司  仓库数据分析员  广州-天河区  1.3-2.5万/月  09-03\n",
      "\n",
      "[5315 rows x 5 columns]\n"
     ]
    }
   ],
   "source": [
    "# ① \n",
    "import pandas as pd \n",
    "import re \n",
    "job_info = pd.read_csv('job_info.csv', encoding='GBK', header=None)\n",
    "job_info.head() \n",
    "# ② \n",
    "job_info.columns = ['公司', '岗位', '工作地点', '工资', '发布日期'] \n",
    "job_info.head() \n",
    "# ③ \n",
    "print(job_info['岗位'].value_counts().index[0]) \n",
    "# ④ \n",
    "print(job_info[job_info['发布日期'] == '09-03']) \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "id": "8e42e856",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>公司</th>\n",
       "      <th>岗位</th>\n",
       "      <th>工作地点</th>\n",
       "      <th>工资</th>\n",
       "      <th>发布日期</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1001</th>\n",
       "      <td>深圳市众联软件科技有限公司</td>\n",
       "      <td>数据分析师</td>\n",
       "      <td>深圳-龙岗区</td>\n",
       "      <td>6-8千/月</td>\n",
       "      <td>09-03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1002</th>\n",
       "      <td>深圳市田田家园家具有限公司</td>\n",
       "      <td>数据分析师</td>\n",
       "      <td>深圳-宝安区</td>\n",
       "      <td>3-4.5千/月</td>\n",
       "      <td>09-03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1012</th>\n",
       "      <td>深圳爱嘻科技有限公司</td>\n",
       "      <td>数据分析师</td>\n",
       "      <td>深圳-龙岗区</td>\n",
       "      <td>0.8-1.5万/月</td>\n",
       "      <td>09-03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1016</th>\n",
       "      <td>瑞庭网络技术（上海）有限公司深圳...</td>\n",
       "      <td>数据分析师</td>\n",
       "      <td>深圳</td>\n",
       "      <td>7-9千/月</td>\n",
       "      <td>09-03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1023</th>\n",
       "      <td>深圳市成达网络科技有限公司</td>\n",
       "      <td>数据分析师</td>\n",
       "      <td>深圳-南山区</td>\n",
       "      <td>1-1.5万/月</td>\n",
       "      <td>09-03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4940</th>\n",
       "      <td>深圳市鑫乔泰数码有限公司</td>\n",
       "      <td>数据分析师</td>\n",
       "      <td>深圳-龙岗区</td>\n",
       "      <td>6-8千/月</td>\n",
       "      <td>09-03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4964</th>\n",
       "      <td>广东中成实业控股（集团）有限公司...</td>\n",
       "      <td>数据分析师</td>\n",
       "      <td>深圳-南山区</td>\n",
       "      <td>0.5-1万/月</td>\n",
       "      <td>09-03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4966</th>\n",
       "      <td>深圳数字动能信息技术有限公司</td>\n",
       "      <td>数据分析师</td>\n",
       "      <td>深圳-南山区</td>\n",
       "      <td>1-1.5万/月</td>\n",
       "      <td>09-03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4970</th>\n",
       "      <td>冠华基业（深圳）投资咨询有限公司...</td>\n",
       "      <td>数据分析师</td>\n",
       "      <td>深圳-龙岗区</td>\n",
       "      <td>6-8千/月</td>\n",
       "      <td>09-03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4994</th>\n",
       "      <td>深圳市信威电子有限公司</td>\n",
       "      <td>数据分析师</td>\n",
       "      <td>深圳-龙岗区</td>\n",
       "      <td>0.9-1.5万/月</td>\n",
       "      <td>09-03</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>550 rows × 5 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                       公司     岗位    工作地点          工资   发布日期\n",
       "1001        深圳市众联软件科技有限公司  数据分析师  深圳-龙岗区      6-8千/月  09-03\n",
       "1002        深圳市田田家园家具有限公司  数据分析师  深圳-宝安区    3-4.5千/月  09-03\n",
       "1012           深圳爱嘻科技有限公司  数据分析师  深圳-龙岗区  0.8-1.5万/月  09-03\n",
       "1016  瑞庭网络技术（上海）有限公司深圳...  数据分析师      深圳      7-9千/月  09-03\n",
       "1023        深圳市成达网络科技有限公司  数据分析师  深圳-南山区    1-1.5万/月  09-03\n",
       "...                   ...    ...     ...         ...    ...\n",
       "4940         深圳市鑫乔泰数码有限公司  数据分析师  深圳-龙岗区      6-8千/月  09-03\n",
       "4964  广东中成实业控股（集团）有限公司...  数据分析师  深圳-南山区    0.5-1万/月  09-03\n",
       "4966       深圳数字动能信息技术有限公司  数据分析师  深圳-南山区    1-1.5万/月  09-03\n",
       "4970  冠华基业（深圳）投资咨询有限公司...  数据分析师  深圳-龙岗区      6-8千/月  09-03\n",
       "4994          深圳市信威电子有限公司  数据分析师  深圳-龙岗区  0.9-1.5万/月  09-03\n",
       "\n",
       "[550 rows x 5 columns]"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# ⑤ \n",
    "job_info.loc[(job_info['工作地点'].apply(lambda x: '深圳' in x)) & (job_info['岗位'] == '数据分析师'), :] "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "id": "6440f60a",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(5334, 5)"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# ⑥ \n",
    "job_info['工资'].str[-1].value_counts() \n",
    "job_info['工资'].str[-3].value_counts() \n",
    "index1 = job_info['工资'].str[-1].apply(lambda x: x in ['月', '年']) \n",
    "index2 = job_info['工资'].str[-3].apply(lambda x: x in ['千', '万']) \n",
    "job_info_new = job_info[index1 & index2] \n",
    "job_info_new.shape "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "id": "622fea22",
   "metadata": {},
   "outputs": [],
   "source": [
    "# ⑦ \n",
    "def get_max_min_value(x): \n",
    "    try: \n",
    "        if x[-3] == '万': \n",
    "            a = [float(i) * 10000 for i in re.findall('\\d+\\.?\\d*', x)] \n",
    "        elif x[-3] == '千': \n",
    "            a = [float(i) * 1000 for i in re.findall('\\d+\\.?\\d*', x)] \n",
    "        if x[-1] == '年': \n",
    "            a = [i / 12 for i in a] \n",
    "    except: \n",
    "        pass \n",
    "    return a \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "id": "1ffc1490",
   "metadata": {},
   "outputs": [],
   "source": [
    "salary = job_info_new['工资'].apply(get_max_min_value) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "id": "c52ae72d",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\86178\\AppData\\Local\\Temp\\ipykernel_19024\\1583918235.py:1: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  job_info_new['最低工资'] = salary.str[0]\n"
     ]
    }
   ],
   "source": [
    "job_info_new['最低工资'] = salary.str[0] "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "id": "24c76418",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\86178\\AppData\\Local\\Temp\\ipykernel_19024\\3671012557.py:1: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  job_info_new['最高工资'] = salary.str[1]\n"
     ]
    }
   ],
   "source": [
    "job_info_new['最高工资'] = salary.str[1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "id": "6db6fa63",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 3、(25.0分)sklearn库不仅囊括了很多机器学习的算法，而且也自带了许多经典的数据集，\n",
    "# 鸢尾花数据集就是其中之一。鸢尾花数据集包含150个数据样本，分为3类，每类50个数据，\n",
    "# 每个数据包含4个特征，分别为SepalLength（花萼长度）、SepalWidth（花萼宽度）、\n",
    "# PetalLength（花瓣长度）、PetalWidth（花瓣宽度）。通过这4个特征可划分鸢尾花的类别。\n",
    "# 请根据鸢尾花数据，编写Python代码完成下列操作：\n",
    "# 1使用sklearn库加载鸢尾花数据和数据标签，将鸢尾花数据储存至数据框data，\n",
    "# 数据标签储存至数据框label。（3分）\n",
    "# 2统计数据中鸢尾花的类别数，并输出其结果。（3分）\n",
    "# 3以花萼长度为x轴，花萼宽度为y轴，绘制并展示散点图。（3分）\n",
    "# 4将加载好的鸢尾花数据集划分成训练集和测试集两部分，训练集和测试集样本数比例为8:2，\n",
    "#     并将训练集数据、测试集数据、训练集标签和测试集标签分别储存至数据框x_train，\n",
    "#     数据框x_test，数据框y_train和数据框y_test。（4分）\n",
    "# 5对数据框x_train和数据框x_test进行离差标准化，将标准化后的训练集和测试集数据分别储存\n",
    "# 至数据框scaler_x_train和数据框scaler_x_test。\n",
    "# （注意：测试集数据需使用和训练集数据相同的规则进行标准化。）（4分）\n",
    "# 6构建决策树分类模型，命名为model，并进行模型训练。（4分）\n",
    "# 7对构建的模型进行性能评估，并输出其结果。（性能评估包括计算精确率、\n",
    "# 召回率和F1值等分类评估指标。）（4分）\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "id": "608f48f3",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "3\n"
     ]
    }
   ],
   "source": [
    "# ① \n",
    "from sklearn.datasets import load_iris \n",
    "iris = load_iris() \n",
    "data = iris['data'] \n",
    "label = iris['target'] \n",
    "# ② \n",
    "print(len(set(label)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "id": "1e07539f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXMAAAD4CAYAAAAeugY9AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAYF0lEQVR4nO3db4xc1XnH8e+TxVQr8mdtsXGVFQ4KsiKhLI7TlbFFQjfILgERuvKLIgKN2rS4TVD6Im0EKBtVQm6sJpHfRDWKidMXYByRCK1oUomQKA4psglrGVjUJqJUxNQBsalxoiCrodbTFztjD+OZuXd2z5577rm/j7TS7szhzrlnL4/W9/7OOebuiIhIvb2t6g6IiMjKqZiLiGRAxVxEJAMq5iIiGVAxFxHJgIq5iEgGShVzM1tvZsf7vHeRmZ0ws8Otr8mwXRQRkSIXlWz3VWC0z3tXAYfc/a6yH3rppZf65ZdfXra5iIgAx44d+5W7j/d6r7CYm9l1wBvAq32abAVuMrOPAgvAX7n7/w065uWXX878/HzRR4uISAcz+0W/9wbeZjGzi4EvAncPaPY0sN3dtwBrgBv7HGuXmc2b2fzi4mJxr0VEpLSie+Z3A/vc/fSANs+5+yut7+eBjb0auft+d59y96nx8Z7/ShARkWUqKubbgTvN7DDwQTP7Ro82D5jZJjMbAWaAZ8N2UUREigy8Z+7u17a/bxX0vWa2291nO5rdCzwEGPCou/9gNToqIiL9lU2z4O7TrW9nu15/nqVEi4iIVKR0MRdZLXPHT/KVx37OL0+f4T1jo3z++vczs3mi6m6J1IqKuVRq7vhJ7nlkgTNvngXg5Okz3PPIAoAKusgQNJ1fKvWVx35+rpC3nXnzLF957OcV9UiknlTMpVK/PH1mqNdFpDcVc6nUe8Z6rxLR73UR6U3FXCr1+evfz+iakbe8NrpmhM9f//6KeiRST3oAKpVqP+RUmkVkZVTMpXIzmydUvEVWSLdZREQyoGIuIpIBFXMRkQyomIuIZEDFXEQkAyrmIiIZUDEXEcmAirmISAZUzEVEMqAZoLIi2lhCJA0q5rJs2lhCJB26zSLLpo0lRNKhYi7Lpo0lRNKhYi7Lpo0lRNKhYi7Lpo0lRNKhB6CybNpYQiQdKuayItpYQiQNKuYZUwZcpDlUzDOlDLhIs+gBaKaUARdpFhXzTCkDLtIsKuaZUgZcpFlUzDOlDLhIs+gBaKaUARdpFhXzjCkDLtIcKuaSPOXlRYqVumduZuvN7PiA9w+Y2REzmw3XNZHzefmTp8/gnM/Lzx0/WXXXRJJS9gHoV4GeMQgz2wmMuPs24H1mtjFU50SUlxcpp7CYm9l1wBvAq32aTAMPt77/PvDhPsfZZWbzZja/uLi4jK5KEykvL1LOwGJuZhcDXwTuHtDsEqD9b95TwPpejdx9v7tPufvU+Pj4cvoqDaS8vEg5RX+Z3w3sc/fTA9r8lvO3YN5e4pgipSkvL1JOUeHdDtxpZoeBD5rZN3q0Ocb5WyubgJeC9U4ab2bzBHt2TjIxNooBE2Oj7Nk5qTSLSBdz93INlwr6Z4BPuPtsx+vvBH4C/BC4Adjq7r8edKypqSmfn59fbp9FRBrJzI65+1Sv90rnzN19uvXtbNfrvzGzaWAH8OWiQi71MTu3wKGnXuasOyNm3Hr1Zeyemay6WyLSQ5BJQ+7+OucTLZKB2bkFHjx64tzPZ93P/ayCLpIePayUng499fJQr4tItVTMpaezfZ6l9HtdRKqlYi49jZgN9bqIVEvFXHq69erLhnpdRKqlVROlp/ZDTqVZROqhdM48JOXMRUSGNyhnrtssIiIZ0G2Wmrrt/iM8+eKpcz9fc8U6Dt6xrcIeLZ82n5DUhbhGV/s611/mNdRdyAGefPEUt91/pKIeLZ82n5DUhbhGY1znKuY11F3Ii15PmTafkNSFuEZjXOcq5lIpbT4hqQtxjca4zlXMpVLafEJSF+IajXGdq5jX0DVXrBvq9ZRp8wlJXYhrNMZ1rmJeQwfv2HZB4a5rmkWbT0jqQlyjMa5zTRoSEamJIJtTSFpi5F6V/xapDxXzGmpnVttRp3ZmFShdbIuOEeIzRCQe3TOvoRi5V+W/RepFxbyGYuRelf8WqRcV8xqKkXtV/lukXlTMayhG7lX5b5F60QPQGmo/gFxJ0qToGCE+Q0TiUc5cRKQmlDMfQoxsdZnPUMZbmkDXeTgq5h1iZKvLfIYy3tIEus7D0gPQDjGy1WU+QxlvaQJd52GpmHeIka0u8xnKeEsT6DoPS8W8Q4xsdZnPUMZbmkDXeVgq5h1iZKvLfIYy3tIEus7D0gPQDjGy1WU+QxlvaQJd52EpZy4iUhMrzpmb2TrgD4Dj7v6rkJ2T3mbnFjj01MucdWfEjFuvvozdM5Ol34d0MvMisvoK75mb2Vrgu8AW4EdmNt6jzUVmdsLMDre+Ji84kJQ2O7fAg0dPcLb1r6az7jx49ASzcwul3ofzGd6Tp8/gnM/wzh0/GayfMT5DRMop8wD0KuBz7v4PwGPAh/q0OeTu062vhR5tpKRDT7088PWi9yGdzLyIxFFYzN39x+5+1MyuZemv8yM9mm0FbjKzn5rZATO74PaNme0ys3kzm19cXFx5zzN2ts9zjM6/xIv+u1Qy8yISR6loopkZcAvwOvBmjyZPA9vdfQuwBrixu4G773f3KXefGh+/4E6NdBgxG/h60fuQTmZeROIoVcx9yZ3Ac8DNPZo85+6vtL6fBzYG6l8j3Xr1ZQNfL3of0snMi0gcZR6A3mVmn2z9OAac7tHsATPbZGYjwAzwbKgONtHumUlu37rhLX+J3751w7m0StH7sJTh3bNzkomxUQyYGBtlz87J4Jn51f4MESmnMGfeSrM8DPwe8DzwT8Ct7j7b0eYDwEOAAY+6+xcGHVM5cxGR4a0oZ+7urwM7ul6e7WrzPEuJFhERqYCm83cJMQmmzISelR4jxgYXIc4jFSF+r9pURFKmYt4hxGL57Qk9be0JPUDpQlh0jBgbXIQ4j1SE+L1qUxFJnVZN7BBiEkyZCT0rPUaMDS5CnEcqQvxetamIpE7FvEOISTBlJvSs9BgxNrgIcR6pCPF71aYikjoV8w4hJsGUmdCz0mPE2OAixHmkIsTvVZuKSOpUzDuEmARTZkLPSo8RY4OLEOeRihC/V20qIqnTA9AOIRbLbz8cXEkKpOgYMTa4CHEeqQjxe9WmIpI6bU4hIlITK96cQuIryisrz5ymFLL5KfRB4lMxT1BRXll55jSlkM1PoQ9SDT0ATVBRXll55jSlkM1PoQ9SDRXzBBXllZVnTlMK2fwU+iDVUDFPUFFeWXnmNKWQzU+hD1INFfMEFeWVlWdOUwrZ/BT6INXQA9AEFeWVlWdOUwrZ/BT6INVQzlxEpCYakzOPsWZ1rAyvcuTDqct4hVinPoQQ8xhirREv5WRTzGOsWR0rw6sc+XDqMl4h1qkPIcQ8hlhrxEt52TwAjbFmdawMr3Lkw6nLeIVYpz6EEPMYYq0RL+VlU8xjrFkdK8OrHPlw6jJeIdapDyHEPIZYa8RLedkU8xhrVsfK8CpHPpy6jFeIdepDCDGPIdYa8VJeNsU8xprVsTK8ypEPpy7jFWKd+hBCzGOItUa8lJfNA9AYa1bHyvAqRz6cuoxXiHXqQwgxjyHWGvFSnnLmIiI10ZiceQg5ZdUlPTFy1bfdf4QnXzx17udrrljHwTu2RT+GxJXNPfMQ2rnXk6fP4JzPvc4dPxnsGO2scTvB0M4az84trMIZSUpCXF9FuoswwJMvnuK2+49EPYbEp2LeIaesuqQnRq66uwgXvb5ax5D4VMw75JRVl/QoVy2rScW8Q05ZdUmPctWymlTMO+SUVZf0xMhVX3PFuqFeX61jSHwq5h1mNk+wZ+ckE2OjGDAxNsqenZNDZ2cHHWP3zCS3b91w7i/xETNu37pBaZYGCHF9FTl4x7YLiu6wSZQQx5D4lDMXEamJQTnzUn+Zm9k6M9thZpeG7ZqIiIRQOGnIzNYC3wW+B+w1s+vcfbFHuwPAlcD33H136I7GWiw/hBAbEKRwLiH6UGaCVIzPKfMZKUzmKjNZJ8SktBjXV07XeQr9LFJ4m8XM/hD4X3c/amZfBR5398e62uwEbnb3PzOzbwJ73P2Ffscc9jZL9yL2sPTgqPN+Y5k2MXRvQNDWvi9el3MJ0YeisYj1OWU+o0xfV1uvyTrw1oJedC6xxrxITtd5Cv1sW9FtFnf/cauQXwtsAXpNA5sGHm59/33gw8vsa0+xFssPIcQGBCmcS4g+lJkgFeNzynxGCpO5ykzWCTEpLcb1ldN1nkI/yyh7z9yAW4DXgTd7NLkEaM9JPgWs73GMXWY2b2bzi4sX3KUZKNZi+SGE2IAghXMJ0YcyE6RifE6Zz6jLZK4Qk9JiXF85Xecp9LOMUsXcl9wJPAfc3KPJb4H2zIe39zquu+939yl3nxofHx+qk7EWyw8hxAYEKZxLiD6UmSAV43PKfEZdJnOFmJQW4/rK6TpPoZ9lFBZzM7vLzD7Z+nEMON2j2THO31rZBLwUoG/nxFosP4QQGxCkcC4h+lBmglSMzynzGSlM5iozWSfEpLQY11dO13kK/SyjzBK4+4GHzewvgeeB/zaz3e4+29FmDviJmb0HuAHYGrKTsRbLDyHEBgQpnEuIPpTZzCPG55T5jFgbjwxy8I5thWmWEBuoxLi+crrOU+hnGcEmDbUijDuAJ9z91UFtNWlIRGR4UTancPfXOZ9oqUQdsqBNk0qGN0Q/Yh0jxLnkoknnulLZ7DTUnQVtL/wP6JdfkTK/kxi/txD9iHWMEOeSiyadawjZLLRVlyxok6SS4Q3Rj1jHCHEuuWjSuYaQTTGvSxa0SVLJ8IboR6xjFGnSdd6kcw0hm2Jelyxok6SS4Q3Rj1jHKNKk67xJ5xpCNsW8LlnQJkklwxuiH7GOEeJcctGkcw0hmwegdcmCNkkqGd4Q/Yh1jBDnkosmnWsI2pxCRKQmouTMRXoJsY54rKxxjHXoUznXnPLbqcxTqJqKuayaMjnh7vW3z7qf+7nXWuSrlTUO0Y+iNqmca0757VTmKaQgmwegkp4Q64jHyhrHWIc+lXPNKb+dyjyFFKiYy6oJsY54rKxxjHXoUznXnPLbqcxTSIGKuayaEOuIx8oax1iHPpVzzSm/nco8hRSomMuqCbGOeKyscYx16FM515zy26nMU0iBHoDKqgmxjnisrHGMdehTOdec8tupzFNIgXLmIiI1oZx5Q6WQjQ3Rhx17D/PCa2+c+3njuy/h8c9NR+9HiM9J4XciedI980y1s7EnT5/BOZ+NnTt+slZ96C7kAC+89gY79h6O2o8Qn5PC70TypWKeqRSysSH60F3Ii15frX6E+JwUfieSLxXzTKWQjU2hDzH7EWM9c5F+VMwzlUI2NoU+xOxHjPXMRfpRMc9UCtnYEH3Y+O5Lhnp9tfoR4nNS+J1IvlTMMzWzeYI9OyeZGBvFgImxUfbsnIyanAjRh8c/N31B4R42zRJrLIo+J4XfieRLOXMRkZpQzlxWTYjcdCrZbGXApZ86XBsq5rJsIdZ5LjqG1viWqtXl2tA9c1m2ELnpVLLZyoBLP3W5NlTMZdlC5KZTyWYrAy791OXaUDGXZQuRm04lm60MuPRTl2tDxVyWLURuOpVstjLg0k9drg09AJVlC7HOc9ExtMa3VK0u14Zy5iIiNTEoZ67bLCIiGSi8zWJm7wK+BYwAbwC3uPvvutpcBPxX6wvgs+6+ELiv2Ygx0SaWEBN+UjmXEGbnFvpuCxdLTuMp5ZW5Z34bsNfdHzez+4CPAY92tbkKOOTud4XuYG5iTLSJJcSEn1TOJYTZuQUePHri3M9n3c/9HKug5zSeMpzC2yzuvs/dH2/9OA681qPZVuAmM/upmR1o/aUuPcSYaBNLiAk/qZxLCIeeenmo11dDTuMpwyl9z9zMtgFr3f1oj7efBra7+xZgDXBjj/9+l5nNm9n84uLisjtcdzEm2sQSYsJPKucSwtk+YYJ+r6+GnMZThlOqmJvZOuBrwKf6NHnO3V9pfT8PbOxu4O773X3K3afGx8eX1dkcxJhoE0uICT+pnEsII2ZDvb4achpPGU5hMTezi4FvA/e4+y/6NHvAzDaZ2QgwAzwbrot5iTHRJpYQE35SOZcQbr36sqFeXw05jacMp8y97b8APgR8wcy+APwIWOPusx1t7gUeAgx41N1/ELynmYgx0SaWEBN+UjmXENoPOatMs+Q0njIcTRoSEakJbU6RmJxywCnkqkVExTy6nHLAKeSqRWSJpvNHllMOOIVctYgsUTGPLKcccAq5ahFZomIeWU454BRy1SKyRMU8spxywCnkqkVkiR6ARpZTDjiFXLWILFHOXESkJpQzb6lTvrsufa1LP2PReEhVGlPM65Tvrktf69LPWDQeUqXGPACtU767Ln2tSz9j0XhIlRpTzOuU765LX+vSz1g0HlKlxhTzOuW769LXuvQzFo2HVKkxxbxO+e669LUu/YxF4yFVaswD0Drlu+vS17r0MxaNh1RJOXMRkZpQzlwaL8S668qQS8pUzCV7IdZdV4ZcUteYB6DSXCHWXVeGXFKnYi7ZC7HuujLkkjoVc8leiHXXlSGX1KmYS/ZCrLuuDLmkTg9AJXsh1l1XhlxSp5y5iEhNDMqZ6zaLiEgGVMxFRDKgYi4ikgEVcxGRDKiYi4hkQMVcRCQDKuYiIhlQMRcRyYCKuYhIBjSdP1HaCEFEhlFYzM3sXcC3gBHgDeAWd/9dj3YHgCuB77n77tAdbRJthCAiwypzm+U2YK+7/xHwKvCx7gZmthMYcfdtwPvMbGPYbjaLNkIQkWEV/mXu7vs6fhwHXuvRbBp4uPX994EPAy90NjCzXcAugA0bNiyjq82hjRBEZFilH4Ca2TZgrbsf7fH2JcDJ1vengPXdDdx9v7tPufvU+Pj4sjrbFNoIQUSGVaqYm9k64GvAp/o0+S3QrjRvL3tc6U0bIYjIsAqLrpldDHwbuMfdf9Gn2TGWbq0AbAJeCtK7hprZPMGenZNMjI1iwMTYKHt2Turhp4j0Vbg5hZl9GvgS8GzrpR8Ba9x9tqPNO4GfAD8EbgC2uvuv+x1Tm1OIiAxv0OYUZR6A3gfcV9DmN2Y2DewAvjyokIuISHjBJg25++ucT7SIiEhEelApIpIBFXMRkQyomIuIZKAwzbIqH2q2CPSLOcZwKfCrCj9/GHXpq/oZVl36CfXpaw79fK+795x1WUkxr5qZzfeL96SmLn1VP8OqSz+hPn3NvZ+6zSIikgEVcxGRDDS1mO+vugNDqEtf1c+w6tJPqE9fs+5nI++Zi4jkpql/mYuIZCX7Ym5m683seJ/3LjKzE2Z2uPU1Gbt/dVJmvDSmwzOzfWb28T7vaTyHYGaf7hirZ8zs6z3a5Dmm7p71F/AA8LM+730I+MeK+3cRcAI43Pqa7NPuAHAEmK2wr4XjlcKYdvRlH/DxAe+nMKYfAR5JfTyBT3dco88AX091TDv68jVgKsUxBdYC/wrM9xvLYccz67/Mzew6ljahfrVPk63ATWb2UzM7YGbBFh4bwlXAIXefbn0tdDdIaI/VMuOVwphiZh8Bft/d/6XP+5WPqZmtAe4HXjKzP+7TLInxdPf72tcoS8td39/dJoUx7ejLBLDe3XuttZ3CmP4pcNCX8uTvMLMLcuXDjme2xby1qcYXgbsHNHsa2O7uW4A1wI0x+talzIU1zYV7rFahzHhVPqYli+Q01Y/pJ4F/B74MbDGzz/ZoU/l4diooktNUP6Ztd9J/6e4UxvR/gA+Y2RhwGfByjzbTDDGe2RZzlor4Pnc/PaDNc+7+Suv7eaCKvyTKXFiFe6xGUma8UhjTMkUyhTHdDOx391eBB4GP9miTwnh2GlQkUxhTzOxtLI3l4T5NUhjTfwPeC/wN8B8sjVe3ocYz52K+HbjTzA4DHzSzb/Ro84CZbTKzEWCG87spxVTmwkplj9Uy45XCmJYpkimM6X8C72t9P0Xv9YpSGE+gVJFMYUxh6TnEU9666dxDCmP698Bfu/u9wM+AP+/RZqjxzLaYu/u1Hff4ngH2mtnurmb3svSA9BngiLv/IGonl5S5sFLZY/Ut4wX8MtExLVMkUxjTA8BHzewJ4DPAdxIdz7aiIpnCmAJcDzwBYGZXJjqma4HJ1v/3VwO9xnSo8dSkoYqZ2QeAhwADHgUOAp/wFeyx2nRm9g7gmyz9s3QN8HfADRrTlTGzLwHz7v6ImV2JrtNlM7MtwD+zdKvlCPC3wJ+sZDxVzGvCzNaytMfqE63bB7JCGtPwNKZhDTOeKuYiIhnI9p65iEiTqJiLiGRAxVxEJAMq5iIiGVAxFxHJwP8DW2mIv5dygyEAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "# ③ \n",
    "import matplotlib.pyplot as plt \n",
    "plt.scatter(data[: , 0], data[: , 1]) \n",
    "plt.show() "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "id": "8cf6d448",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "              precision    recall  f1-score   support\n",
      "\n",
      "           0       1.00      1.00      1.00        10\n",
      "           1       0.89      0.89      0.89         9\n",
      "           2       0.91      0.91      0.91        11\n",
      "\n",
      "    accuracy                           0.93        30\n",
      "   macro avg       0.93      0.93      0.93        30\n",
      "weighted avg       0.93      0.93      0.93        30\n",
      "\n"
     ]
    }
   ],
   "source": [
    "# ④ \n",
    "from sklearn.model_selection import train_test_split \n",
    "x_train, x_test, y_train, y_test = train_test_split(data, label, test_size=0.2) \n",
    "# ⑤ \n",
    "from sklearn.preprocessing import MinMaxScaler \n",
    "scaler = MinMaxScaler().fit(x_train) \n",
    "scaler_x_train = scaler.transform(x_train) \n",
    "scaler_x_test = scaler.transform(x_test)\n",
    "# ⑤ \n",
    "from sklearn.preprocessing import MinMaxScaler \n",
    "scaler = MinMaxScaler().fit(x_train) \n",
    "scaler_x_train = scaler.transform(x_train) \n",
    "scaler_x_test = scaler.transform(x_test)\n",
    "# ⑥ \n",
    "from sklearn.tree import DecisionTreeClassifier \n",
    "model = DecisionTreeClassifier() \n",
    "model.fit(scaler_x_train, y_train) \n",
    "# ⑦\n",
    "from sklearn.metrics import classification_report # 模型评估 \n",
    "pre =model.predict(scaler_x_test) \n",
    "res = classification_report(y_test, pre) \n",
    "print(res) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "id": "4445d157",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 1、(15.0分)NumPy是用于数据科学计算的库。请使用NumPy库，\n",
    "# 编写Python代码求解各坐标点之间的距离。\n",
    "# 1启动Jupyter Notebook创建一个Notebook。（2分）\n",
    "# 2随机生成100个二维坐标点，并储存至数组arr1。（4分）\n",
    "# 3计算各样本点之间的欧氏距离，并储存至数组arr2。（4分）\n",
    "# 4将数组arr2的形状转换为（100, 100）。（3分）\n",
    "# 5将数组arr2存储为当前工作路径下的一个二进制格式的文件arr2.npy。（2分）"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "id": "57a515ba",
   "metadata": {},
   "outputs": [],
   "source": [
    "# ① \n",
    "import numpy as np \n",
    "# ② \n",
    "x = np.linspace(0, 10, 100) \n",
    "y = np.linspace(10, 20, 100) \n",
    "arr1 = np.array((x, y)).T # 数组arr1形状为（100, 2） \n",
    "# ③ \n",
    "arr2 = [] \n",
    "for i in range(len(arr1)): \n",
    "    for j in range(len(arr1)): \n",
    "        a = np.sqrt((arr1[i, 0] - arr1[j, 0]) ** 2 + (arr1[i, 1] - arr1[j, 1]) ** 2) \n",
    "        arr2.append(a) \n",
    "arr2 = np.array(arr2) \n",
    "# ④ \n",
    "arr2 = arr2.reshape(100, 100) \n",
    "# ⑤ \n",
    "np.save('arr2.npy',arr2) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "id": "5ba59ac5",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 2、(20.0分)用户用电量数据探索与分析。\n",
    "# data数据集为用户用电量数据，其中包含3个字段。具体特征说明如下：\n",
    "# 请根据用户用电量数据，编写Python代码完成下列操作：\n",
    "# 1读取数据文件data.csv，并储存为数据框data。（2分）\n",
    "# 2将数据框data转换为行索引为用户编号、列索引为时间、值为用户用电量的数据透视表data_new。（2分）\n",
    "# 3采用四分位法对透视表data_new中的异常数据进行识别并处理。（4分）\n",
    "# 4构造特征1：统计每个用户用电数据的基本统计量（基本统计量包括最大值、最小值、均值、中位数），并将结果储存至数据框feature1。（2分）\n",
    "# 5构造特征2：将每个用户用电数据按日差分后，求取基本统计量（基本统计量包括最大值、最小值、均值、中位数），将结果储存至数据框feature2。（2分）\n",
    "# 6构造特征3：求每个用户的5%分位数，并将结果储存至数据框feature3。（2分）\n",
    "# 7构造特征4：统计每个用户的日用电量在其最大值0.9倍以上的次数，并将结果储存至数据框feature4。（2分）\n",
    "# 8合并特征1、特征2、特征3和特征4，并储存至数据框feature。（2分）\n",
    "# 9将数据框feature保存到当前工作路径下并命名为feature.csv。（2分）"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "id": "0e1f084c",
   "metadata": {},
   "outputs": [],
   "source": [
    "# ①\n",
    "import pandas as pd \n",
    "data = pd.read_csv('data.csv', parse_dates=['DATA_DATE'], encoding='gbk') \n",
    "# ② \n",
    "data_new = pd.pivot_table(data=data, values='KWH', index='CONS_NO', columns='DATA_DATE') \n",
    "# ③ \n",
    "def clear_(x=None): \n",
    "    QL = x.quantile(0.25)   # 下四分位数 \n",
    "    QU = x.quantile(0.75)    # 上四分位数 \n",
    "    IQR = QU - QL   # 四分位距 \n",
    "    x[((x > QU + 1.5 * IQR) | (x < QL - 1.5 * IQR))] = None \n",
    "    return x \n",
    "data_new.apply(clear_, axis=0) \n",
    "# ④ \n",
    "feature1 = data_new.agg(['max', 'min', 'mean', 'median'], axis=1) \n",
    "# ⑤ \n",
    "feature2 = data_new.diff(axis=1).agg(['max', 'min', 'mean', 'median'], axis=1) \n",
    "# ⑥ \n",
    "feature3 = data_new.quantile(0.05, axis=1) \n",
    "# ⑦ \n",
    "feature4 = data_new.apply(lambda x: sum(x > x.max() * 0.9), axis=1) \n",
    "# ⑧ \n",
    "feature = pd.concat([feature1, feature2, feature3, feature4], axis=0) \n",
    "# ⑨ \n",
    "feature.to_csv('feature.csv') "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "id": "ca2c1184",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 3、(25.0分)泰坦尼克号是英国白星航运公司下辖的一艘奥林匹克级 HYPERLINK \"https://baike.baidu.com/item/%E6%B8%B8%E8%BD%AE/629860\" \\t \"_blank\" 游轮，1909年在哈兰德与沃尔夫造船厂动工建造，1911年下水，1912年完工试航。\n",
    "# 泰坦尼克号是当时世界上体积最庞大、内部设施最豪华的客运轮船，有“永不沉没”的美誉。然而不幸的是，在它的处女航中，泰坦尼克号便遭厄运。1912年4月14日，泰坦尼克号与一座冰山相撞，造成右舷船艏至船中部破裂，五间水密舱进水。4月15日，泰坦尼克船体断裂成两截后沉入大西洋底3700米处。2224名船员及乘客中，1517人丧生。\n",
    "# 经过探究发现，似乎有些人比其他人更有生存的可能。接下来通过其生存与遇难的人的数据，预测乘客生还人数。\n",
    "# 数据文件为titanic.csv，具体特征说明如下：\n",
    "\n",
    "# 请根据数据，编写Python代码完成下列要求：\n",
    "# （1）读取数据文件titanic.csv，并储存为数据框titanic。（1分）\n",
    "# （2）计算乘客生还人数，并输出其结果。（2分）\n",
    "# （3）绘制男女乘客比例饼图，并添加标题“男女乘客比例饼图”。（3分）\n",
    "# （4）绘制船票价格直方图，并添加x轴标题“船票价格”和y轴标题“频次”。（注意：需先对“船票价格”进行升序排序。）（3分）\n",
    "# （5）在数据框titanic中，新增一列为家庭人数，并将列名设置为“familysize”。（2分）\n",
    "# （6）修改数据框titanic的“Sex”列，使用数值“1”和“0”分别代替“Sex”列中的“male”和“female”。（2分）\n",
    "# （7）根据“Pclass”、“Sex”和“familysize”这三个特征预测乘客是否生还。将数据集划分成训练集和测试集两部分，训练集和测试集样本数比例为8:2。（4分）\n",
    "# （8）构建KNN模型，命名为clf，并进行模型训练。（4分）\n",
    "# （9）使用测试集数据进行模型预测，并将结果储存至数组pre。（2分）\n",
    "# （10）对构建的模型进行性能评估，并输出其结果。（性能评估包括计算精确率、召回率和F1值等分类评估指标。）（2分）"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "id": "ba19923a",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAO4AAAD3CAYAAAAaN2hxAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAcfUlEQVR4nO3deZgU1b3/8fd3ZpDdBkRFA1qKXhRXXBJFo2jEiB2NGGOuuTH6XI2ioibRxDLGpH/xem2jN0Zc4ha3GKJxi0q572sCATfcQEILkUU2B5iBWXrO749TI+0wPczS3aeq+/t6nn5mpruq69vQnz6nlj5HjDEopeKlynUBSqmu0+AqFUMaXKViSIOrVAxpcJWKIQ2uUjGkwS0wETlaRB4XEdnEclUi0ltEhonI7uF6k0XkZhG5ZhPrXiAiv+9iXY0i0qeDxyeKyFkdPH6JiGyd8/fhIlIT/l4tIqeJSFX498kiMqwr9amuqXFdQFSJyHjgIeCj8K4tgf5AJvzbAy4yxvyxzarjgOmmgxPkIjIKeBxYDawBhgLzgSeBR4GMiIgxxoRheA/I/SDYHBgsIke1eeqjjDEZEbkVeN0Yc0fOY41AUwcv+YfAIyLyLLAn8CHwrjHmnNbnBu4I698KuAfYBVhtjMmKyA+B1SLyNyANPJXzemdh32uNHWx/hDFm6w4eVzk0uPmtB2YaY8YBiMgk4ABjzKnh33fS/htxIjAh35OGQfyXMWZkzn2TgbXGmDtz7uslIlljTIuIjDLGVIWt+F+Aq40x/2zzvG+xoQfVXkibjTHZcNkdgX2NMfeHf28HHAacBPwVeMsYc0j42N+B04Es0BI+1w+ABcDZIrIKuB24FNgifO33Aytztt0EHB9+qIwNlz269cMtbLkz+f7N1MY0uPkZYF8RaQ3IlkD/nL894FkRGQ48A9Rj/z23A+7N6SkPAOqA3kAS22rfISK5wdoGyIrI6Tn3bQb8QEQ+Bs4O77sE+B7wHyLSGiKDbS1/ASzPuW8jItIfOA84B7gu56FfYz841ovIPkCziPwAmIvtEdTlPMeAcP1fA7sDo4FDgcOBdeHyOwDjRGScMeZzwg8REUkAt4bLzRCRXYCLgJuB5vZqVu3T4OYnbLrFFWPMv4Fdw/v+B/irMeby8O9RwHPArrldZxE52BjTJCJTsa3PBGBt+PAWwDyg0RgzJ1z+5vC5JwN3AmdgW7/7gIXGmA+AD9rUP0VErsR+cEwO73sH22LvaYxZGT73YcAR2B4G2B7DDOAAYDdsS5v7QXACcCPwGfAsUGeMeSOs72PgReAeY8zBbeqpBh7DtsSvAK0t+R/QYy1dpsHNrx5ARF4M/64CWnL+hpzuoIj0wraGY3MePxJ4qJ393b+LSCOwM7alzDUZ24JfkXPfV4D9wuUnAdOBhUAtcEGe+s8zxtyTU9912MDWtVluMfaD4FoR+QpwGja0E7Et5Zg2y9+NDfJiYDg22O0SkSpjTGvPIAuci+0VvAr8GEiGuwIa3C7S4Ob3TmtrCyAiS7EHf97Ms/y3gX7AwSLyKPbNPQn4fjvLfhfbbb4Q+4FwDLY7+ihwmzHmchG5V0TON8YsDVv1o8J904HYrvVXgDeAE0XkdWPMgk29oHZCizHmQ+DDsGt/MLbb+jC2x3EGcGyb5VvCf49mY0yziHw7bNmHhq+hDthORN4BpmIPVAGMAFLh71OB14AzReQWNvQIVCdpcPN7WERGYANYhX1j3tFm33WtMWZvAGPMAyKyABvWq4B/AO8ZY97OfVIRGQjchj0INA3bsu6Bbb0nAr1F5FXgQ2PMUhE5CDgT2+K2APdi9y1XA8djW/mbRaQ38GNjzE1ttlcNDOrMCzbG3Beu8yC2GzwT6IPt5m4k3GddZIzZRUQeAG7Cdpfb6yovxO6LLwJOBv4XuCV8betRXaLBzcMY863W30XkYOC3xpixOfd9G/jvNutMB6aLyE+BnwNNYWt0tTFmWbjMGuBwERkKfAMbwCux3d49gX2As40x74RPuxx7xPUW7P7yceGt1UDgBeB/sEd6vyAix2I/GK5q+/pE5ERgmjGmvp2Xfxq25Tcicgbw73aW6Yvdn71ZROZgdxHOwJ6q2mhz4Wv/VEQuAr4DPAi8ZIypDY8qd3jeW7VhjNFbBzdgR+xBnePb3H8ucG34exX2nOYkbEv7MLYrmwAuwx7IuSRcdgj2gNRs4L/C+84D/jv8/VjsOdTngR1ztjcIyLRT3zjgz23uexrbgj8NHBjetwIYFv4+EnvKaGjOOh9jw/MT4H2gf3j/gPC1vQcMCe/bDmjAdvH7YXsOV4WPecCrbeqZDng5f28Vbuf48O8a4FPX/9dxummL246wOzsZ+DqwE3CZMeah8LH+wPnAqdgAApyFPdr6CnCqsUd5W10qIndgT5mA3QdcCHzVbGjtehOeEzbGPCoij2ED96+c59kMGBZ2o3Ml2Pgc6L3A74wxT+bcdzPwfHggqAW41BizPOfxgcD22DB+02zYH67CHix7yYRHoo0xC0RkL2PMhyJyN7bXcHG4fK/wlqsGeDw8IPclIvKrnGVUJ0n4iafaCM9jzsdegWTaPHYasMAY80wJ6xGgr2m/a+tM2M1tMRuOHre3zBbA5ya8AET1nAZXqRjS82dKxZAGV6kY0uAqFUMaXKViSIOrVAxpcJWKIQ2uUjGkwVUqhjS4SsWQBlepGNLgKhVDGlylYkiDq1QMaXCViiENrlIxpMFVKoY0uErFkAZXqRjS4CoVQxpcpWJIg6tUDGlwlYohDa5SMaTBVSqGdNqHiPP8oA92Pp4dcn7ugJ2bqDd2apJewGaDWV3/Zp9JCexctFnsHL8r27l9ip0raB6p2qUlfDmqQDS4EeH5wU7Y+Wl34sshHUYnZ7ITG8ohXdpwKrEGmAfMBd7FTtA1nVTtqi49jyopnYLEEc8PtgUOx061eTh2BrweGcLqlbP6TOpacPP7GBvifwDPk6qdXaDnVQWgwS0Rzw8GA4exIay7FHobBQ5uWwuBJ4EngGdJ1a4p0nZUJ2hwi8jzg+2wk19/CxhDkQ8GFjm4uZqwU4reC9xPqvbzEmxT5dDgFpjnB9VAEjgTOIoSHrkvYXBzNQABcA8QkKrdaA5cVXga3ALx/GAEcDq2hR3uogZHwc21CvgTMIVU7TyHdZQ9DW4PhK3r0WxoXatd1hOB4LZqAaYB15CqfdFxLWVJg9sNnh8MBM7HBtZJ69qeCAU315vANcBUUrU6I32BaHC7wPOD3sA5wMXAUMflbCSiwW31EfAr7MEsfdP1kAa3E8Iu8SlAChjhtpr8Ih7cVm8Cl5CqfcJ1IXGm1ypvgucH3wDeBv5IhEMbI2OAx0klXiaV2N91MXGlLW4enh94wO+AiY5L6bSYtLi5WoDbgItJ1a50XUycaHDb8PygL3Yf9mdAH8fldEkMg9tqOfBTUrV/cl1IXGhXOYfnB3sAM4FLiVloY24ocDepxNOkEp7rYuJAgxvy/OBM7AX1u7qupYKNB94ilTjJdSFRV/Ff6/P8IAHcApzouhYFQAKYSioxAThHv8zQvopucT0/2B97ekJDGz0nA2+SSnzVdSFRVJHB9fxAPD+4AHgN+2V1FU0jgddIJc5zXUjUVFxwPT8Yir2O9mrskC8q2mqAa0klbiGV0P+vUEUF1/OD/YC3sF8MUPHyI+AZUoktXBcSBRUTXM8PDgaeww6ypuLpUGAGqcRurgtxrSKC6/nBeOApYHPXtage2wF4nVTiYNeFuFT2wfX84BjgMaCf61pUwWwOPEUqcYTrQlwp6+B6fnAi8BB2/GFVXvoB00gljnFdiAtlG1zPD04FpqIXmZSz3sBDpBLfc11IqZVlcD0/OBu4HcdDyaiSqMFeaVVR4S274Hp+8DPgBjo5+r8qC1XYLymMd11IqZRVcMMvCvzWdR3Kic2w3eaK+HJ+2QTX84OvA9e5rkM5NQA7usYo14UUW1kENxzT+AH0EkZlv9v7NKnENq4LKabYBzccseJvwFaOS1HRsR3wIKnEZq4LKZbYBxc7iNs+rotQkXMgMMV1EcUS6+CGR5B1tASVz5mkEqe5LqIYYhtczw++CaRd16Ei74Zy/DJ+LIPr+cHO2CkeY1m/Kqne2P3dwa4LKaTYvfE9P+iPPRg1yG0lKkaGYy/KKRuxCy5wBTDadREqdk4ilSibscViFVzPD8ZiJ91SqjtuLJfzu7EJbjhT3m3EqGYVOVtg30OxF6cQXIoOVq567mhSiVNdF9FTsQiu5we7AT93XYcqG1eSSiS6soKIpETkAxF5MbxN7mkRIvJid9eNy5fMp6DXIavC2Qo71/FPurje5caYewpfTtdFvsX1/OAE4HDXdaiyM7kno0WKSD8ReUBEXhaRG8L7ZorIEyLyiIj8Q0Qmici2IvKqiLwiIpd38Hxbh+u+LiIXb2r7kQ6u5wf9gP9zXYcqSzV0/VrmS8Ju8o3AGcBsY8whwDYisid2HKzvAnsC3we+hh0O2AcmAB2Nj3UxcJ8xZixwnIh0OH50pIOLfcHbuS5Cla3DSSWO78LylxtjxhljzgZGARPD/dQdsQFdaoxZC3wCZLGjsDRj38e3AQM7eO5RwFnh8/UHtu2okMgG1/ODLYELXdehyt5lpBLdycFHwO+NMeOAXwIL8iz3U+xFQ6cDHc0i/xHgh8+XBlZ2tPHIBhd7oUVf10Wosjca263tqluBCSLyMjAJWJhnuWnATcCjQL2I5JtJIw1cKCKvAUcBSzvauBjT0YeAG+GX4xdgRzNQnTSE1Stn9Zk0xHUdMTQX2JVUbdZ1IZ0V1Rb3VDS0qnR2BmI1vGvkguv5QRV2v0CpUvoFqURshvSNXHCB44CdXBehKs5uwJGui+isKAZXjyQrV2LzzbNIBTf82t6BrutQFStJKrG96yI6I1LBRVtb5VYV9tRO5EUmuOE4Ut92XYeqeKeRSkR+WtbIBBf4EdGqR1WmLYGuXAbpRJSCcpzrApQKRX6s7kgE1/ODXbEnwZWKgiO7+kX7UotEcNF927K0cp3hmXnNLK9vKdhzLl3bQlO26Jfp9ibi70kNriqKVesM35paz/RPsxx2Vz3L6lrY7po1jLuzjnF31vHu0vYvC65db5jw5zqO/FMdE++rpzFruH56I/vfupa6RsNT85rpVV2SC5wiPZSr8y8ZeH4wDFiEziDfY1H6ksFLmWZ618ABw2u48On1bNlPWLnOcOX4Ph2ud+OMRnYeUsX4kTWcNW0dE3au4dGPmtl/22pGDa1iRb3hO6NLMopRE7AVqdrPS7GxropCi3sMGtqyc6hXwwHDa3j5k2amf5qlby+YNreZr966ltMeWUdzS/sNxtn7b8b4kXYotGX1hq36C8ZAUws8Pa+ZCTuXbJi0XsDRpdpYV0UhuNpNLlPGGO6b3cTgvsKYYdU8e3I/pv9oAE0t8Pjc5g7XfWNhM6vWGw4YXsORI2uYNqeJ4ZtXcexf6nlhfsfrFtA3SrWhrnIa3HAeoMj+46ieERFuSPZlz62qWLTGsM1A+3bbb9sq5q7If8Bq5TrDuU+s5/Zj7TgK39u9F6lxvRnUR0juXMODHzSVpH4i/N503eJ+E+h4p0fF0pWvNnD3240AfL7eMClYx9tLsmRbDH/7sJm9hlW3u15j1vDd++u54ht92H7Qhrfn3BUtjBws9K4R8vSyi2F7UomRJdtaF7gO7rGOt6+K5Ix9N+NP7zRxyB11ZA28fGp/Tn54HXvfXMeBw6s5Ysca3l+W5ZfPr//Sen+c1cSsxVkuf6WBcXfWcd/sJlY3GIYNqGL0ltXcMrORI3Ys6XDgkWx1nR5V9vzgfXRakYKJ0lHlMvJXUrWRGx3DWYsbTuKlV0upqIvk10xddpV3IT5ToKjKNYJUosPByV1wGdw9HG5bqa7Y23UBbWlwldq0Ma4LaMtlcHd3uG2lukKDm0NbXBUXe7suoC0nwfX8IAGMcLFtpbphZNTGXHbV4mo3WcVJb2Br10XkchVc7SaruInUsK2ugjva0XaV6q5IzdPsKriR6nYo1QkaXGCwo+0q1V0aXDS4Kn4iddmjBlepzhnguoBcGlylOmeg6wJyuQpupD69lOoEDS6wmaPtKtVdkWpsSh5czw80tCqOKr7F1eCqOKr4a5U1uEWyigGDnszu95IxFG6yHtWq0XUBuVwEV4erKRJDVdWkpp8eOrnpvDdbjCxzXU+ZKdlgzp3hIrhrHGyzogQtB+w7tuG6lpVm4FuuaykjlR3cTDq5Dlhb6u1WmiUM2Xq/hj/soV3ngqn4rjKAduNKoIWqau06F0xlt7ghfROVkHadC2K16wJyaXArhHadeyxS71kNbgXRrnOPLHddQC4NbgXSrnO3LHVdQC4NboXSrnOXLXJdQC5Xwf3M0XZVDu06d4kGF21xI0W7zp3ysesCcmmLqwDtOm/CemC+6yJyuQru+0DW0bZVHtp1zmsOqdpIfZg5CW4mnawH3nOxbbVprV3nVWbAW65riYgPXBfQlstJv/7hcNtqE5YwZOt9G27SrrOlwc0x3eG2VSdo1/kLGtwcGtyY0K4zM1wX0JbL4L4H1DncvuqCCu46LyJVG6kjyuAwuJl0MgvMdLX9QsjWrcJkm12XUTIV2nV+zXUB7XHZ4kKJu8srnr6R+o83HBPL1q1i0R3n5V0+u34tS+//NYvv+jErnrwegNUzH2PxXT+hpXE96+bPQqorbySeCus6v+q6gPa4Dm7JjiyvXzibbN0q+u30tS/uW/XC7Zjm/AMb1M1+nv6jx7HNKb+npXEdDYvn0vTZfAbsdSSNS+YgvfqUovRIqqCuswa3HSVpcU22mRVPXk/N5ltRP/fvAKz75G2kV2+q+w/Ku15V34E0Lf+ElvVrya5ZRs3mQzHGYLLNrJv/Jn133LcU5UdWBXSd1wBvuy6iPU6Dm0knFwD/LvZ21s5+nl5DR5D42gk0LJ7D6n8+Qu3r9zL40FM7XK/P8N1orl3G6pmP0WuLEVT1GUjfHcawbt4MagYOZdmDl7H+k3eKXX7klXHX+WlStZG8ws91iwvwQLE30PTZPAbudRTVAwYzYPRhrHruNgaOSVLVp+NZJT5/bSpbfPMcBh10Er2GDGftu8/Sf9dDGHTQ96nq05++I/enfk4kj12UXJl2nR91XUA+UQjuX4q9gZpB29L8+RIAGpbMpddWO7Bm1jSWTPVp/Gw+K56Y0u56LevX0rgsg2nJ0rB4zhf3N61aRM2gbZDqXhhjil1+bJRZ1zkLBK6LyEei8Mbz/GAusFOxnr+loZ4VT1xLtu5zTEszWx53MTUDhwKwZKrPsO+naVy+gLr3X2LwISd/sV7Doo9Y8fi1NK/+jN7b7sKWx18CxtCw6CN6bzuKJVMvYtDYk+g3amyxSo+tYaxc+kRvf/FgWbu361q66SVSteNcF5FPVIJ7GfBL13WowqqiJfuHXte8emTVzK+LRKJ31xUXkKr9nesi8onKP+ZU1wWowmuhqvrMpgvi2HU2wMOui+hIJFpcAM8P3gL2cl2HKo6YdZ0j3U2G6LS4oK1uWWs96vxUdt84HHW+3XUBmxKl4N6L7aKoMhWTrvNqSnCKsqciE9zwYgw9KVoBIn7Bxr2kautdF7EpkQluSLvLFSLCXec/ui6gM6IW3D8Dta6LUKURwa7zDFK1sRjgIVLBzaSTq4EbXdehSitCXecrHW+/0yIV3NA1wDrXRajSikDXeQ4RP3ebK3LBzaSTy4jJfoYqLMdd56ujNnZyRyIX3NBVRGwGcFU6DrrOS4C7S7StguhScEXkThH5a/j7vSJyZwfLvtjdosJTQ3d0d30VfyXuOv8fqdqGIm+joLrT4u7V5mex/AY7Z4uqUK1d53Obzn2zxUixJpZeAFxfpOcumu4Et1FEtsB2ZQeIyJMi8oqI5G0hRaSfiDwgIi+LyA2d2UgmnfwU6NSyqrxNazlw34MapjQXqet8Cana2DUQ3Qnu28D3wp8NwHXAEYAnIlvnWecMYLYx5hBgGxHZs5PbugJ7CZqqcIvZYlgRus6zsNcOxE53gjsLODX82QScjn3xQ4C+edYZBUwM93t3BL7SmQ1l0skVwNXdqFGVoSJ0nS8kVRvL6+O7G9z9w5/V2AuyT6LjWQk+An5vjBmH/cL8gi5s76pwfaWAgnWdp5GqfaFQNZVad4KbwZ6s/gTbcl4MPB8+lq8lvRWYICIvA5OAhZ3eWDq5HjgFnU9X5ehh17kOmFyMukolMl+k3xTPD64Efu66DhU936p6Y+aUXtdvXyVmaCdX+TGp2muLWlSRRfUCjPb8CjuTvVJf0sWu89+xB1RjLTbBzaSTDdguc+XMsqU6rZNd50bgtDhd2phPbIILkEkn/wn81nUdKpo6cdT5f0nVlkWvLVbBDf0/YLbrIlR05ek6vwFc7qikgovNwalcnh/sg53pr/LmuFSdljOu8x4i7EOq9hPXNRVKLIML4PnBb4BLXdehIs/sLx8ec/8VF0R2OpHuiGNXudVvgGdcF6Ei77flFlqIcXAz6WQzcAKg81yqfJ4DLnFdRDHEtqvcyvOD4dhzc526/llVjAXAvpl0slhfB3Qqti1uq0w6+W/gaPRbRGqDeuCEcg0tlEFwATLp5DvAd9GLM5QdfOHYTDo5w3UhxVQWwQXIpJNPA2e6rkM51Qgcn0knn3NdSLGVTXABMunk7cBlrutQTjQBJ2bSySdcF1IKsT841R7PD+4Cfui6DlUyWeA/M+lk5CfrKpSyanFznA484roIVRItwCmVFFoo0+Bm0skm4DvATa5rUUVlgNMz6WQsx43qibLsKufy/OAXlNHF5epLzsqkkxX54Vz2wQXw/OAU7PA5vVzXogoiC5yfSScrdvjeiggugOcHR2IHthvouhbVIyuxB6Iq+jr1igkugOcHY4DHgWGua1Hd8jYwMZNOznddiGtleXAqn0w6+SZwIDrcaxz9BRirobUqqsVt5fnBEOBvwNcdl6I2LQv8PJNO/s51IVFSUS1uq0w6uRI4DDsMjo7XHF3LgfEa2o1VZIuby/ODsdgpVDzHpagvm4Xdn+3KrBcVoyJb3FyZdPJ17JShFXcSP6JagBuBgzS0+VV8i5vL84OJ2DeNHnV2YwZwdjgMr+pAxbe4uTLp5MPAaOBOx6VUmlXA2cABGtrO0RY3D88PxgO3oPu+xWSAu7BHjZe5LiZONLgd8PygD3AWcBGQb9Ju1T3vYrvFr7ouJI40uJ3g+UE/7LSMPwM6OyOcat9a4NfAlHCkTtUNGtwu8PxgAHA+cAEw2HE5cbMWu+txdSadXOy6mLjT4HaD5webAz8JbwnH5UTdCmAKcH144YsqAA1uD3h+MBjb+p6HfuuorXnYeWhvy6STda6LKTca3ALw/GAgcDzwA+BwKvc0WwvwJHA98GQmndQ3V5FocAvM84NtgJOwIR7juJxSWQjcB/whk07+y3UxlUCDW0SeH4wG/iu8be+4nEIy2KucHgMey6STbzuup+JocEvA8wMBDsYG+AhgpNuKuqUOeBYb1iCTTi5xXE9F0+A64PnBlsAB4e1AYH9ggNOiNmaA+cBT2LC+kEkn17stSbXS4EaA5wfVwO5sCPKBwM6AlGDzBvgEeB94L+fnB5l0cm0Jtq+6QYMbUZ4fDMJeJz0CGB7eRmC/uTQkvA3GnkfODXgTdra6dW1+tv5ehz1VkxtQPV0TMxrcmPP8oAob3mZgnV5GWBk0uErFUKVeKKBUrGlwlYohDa5SMaTBVSqGNLhKxZAGV6kY0uAqFUMaXKViSIOrVAxpcJWKIQ2uUjGkwVUqhjS4SsWQBlepGNLgKhVDGlylYkiDq1QMaXCViiENrlIxpMFVKoY0uErFkAZXqRjS4CoVQxpcpWJIg6tUDGlwlYohDa5SMfT/ASEFl+KIfCglAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# ①\n",
    "import numpy as np \n",
    "import pandas as pd \n",
    "titanic = pd.read_csv('titanic.csv') \n",
    "# ② \n",
    "titanic['Survived'].sum() \n",
    "# ③ \n",
    "import matplotlib.pyplot as plt \n",
    "plt.rcParams['font.sans-serif'] = 'SimHei' # 正常显示中文 \n",
    "plt.rcParams['axes.unicode_minus'] = False # 正常显示符号 \n",
    "sex_ = titanic['Sex'].value_counts() \n",
    "plt.pie(sex_.values, labels=['Male', 'Female'], autopct='%1.1f%%', startangle=90) \n",
    "plt.title('男女乘客比例饼图') \n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "id": "4c1327a8",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYAAAAETCAYAAAA/NdFSAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAZ4ElEQVR4nO3df5hdVX3v8feHmQRDUiAhYzCFMU1N9bFCkE5pgrnpQBNtAL3c2AqtFVt+BChUffBpE0osPyo0UqRaFGq8UYHboIEWivwyYAnEGpRJ0YCtFtoGMPxoaGLSiAQl3/vHWkMOZ85MQsg+M+esz+t55pmz19nn7LVgsj9nrbX3OooIzMysPPsMdwXMzGx4OADMzArlADAzK5QDwMysUA4AM7NCOQDMzArVOdwVMNtdkn4ZODcizh5in58DFgM/HeqtgNHAZRGxOb/uAOBm4Dcj4kVJXcCngA9GxM8aHOcsYFJEXJy3PwT8OCKW7UY79gO2R8RLu7HvPsDsiFi1q33NXi0HgLUESRcBpwKjJP0qcDEwCzgJeA6YApwC3A3cxM4A+CxwKfCLwDuAJaQA2Bd4vuYQJwJPR8SLeftDwDzg14B/alClF4DtNdsfBK6VNCtvPxkRj+e63w3sX1OnacDzkjbk7VH52Cc2OM5+wJclvScivt3geST9M+nf8ouNns8OjYhJQzxvBXIAWKv4eeBM4Cng/wA/l8svjIhrJX0JeDEifippOvCnwFZgKnAF8Lr8mreQTsYXR8QDNe//R6STfn9P43igB7hJ0vsi4rH+HSXtSwoRJI0mBcsU4Kj88xvAB4DHASJibm1DJN2V6/2t+kbmT/yd/UEUEdsknVfT3v79OoEdEbGDFCzzI2K9pKOBjwHHRb7LM++7fqj/uFYmB4C1ipdIwzb/F7gd2AEcAnyjZp99an5fFxEX5ZPtWcDbSMM750r6ONDR/yJJJwG/Ajwl6Y3Al4HTIuI/JJ0BfE3Swoi4KQ8xrQEmAz8B3k4KmaeAK4FngZsj4p7ayks6BTgvb04DviSpvwdxXURcmR//KvD/JL1E+vQ/HtiQ36P2LTuA+cDD5J5FHsb6fK7Xg5LeAiwEPgcMGMYycwBYK9kC/A3QDRwK/Drw4fzcM8BSSVeTPu0ukHRCfu6m/jeQ1JcfXpa3JwOfAH5AOjF/DrgIeDrv9xzwx8DFkrZExN3A2yStA+4iDTHNAVYCNwLbyD2JOgcCK4DL68pPAX6pfyP3Cqblus0BFkTE+3b1H4YUCF8FNgGrgQeA04Fr8MUeNggHgI14eYJ1PjCbNKxzCGlYZ1FEPJd3+1PgOtIJex/SkM9gY+Kj2TmuPym/1wnAY8Dc/Pj9pPmCeaTAmZ6HW5B0BGlO4TjSMMwiSb2kXsmBwJGSvh8RP6o55g5gQX5NrdeTgqGRscC7JH2/rvyxiDihruwl0jDWc6Re0UeA4yNiRx5WMhvAAWCt4PPAkaQT5ZOkid9ngNmS/pg0BCLgfyKiV9LBeXswLz8XEQ8BD+XeQkTEv0saQxrSgTR08mL/yT9bCCzLdeiUtBZYRQqO/wHOAfokfSQibqt53UPA1+rqMnOIenYDV0TEn79c8TTJfHGDfQ8l9VwAlpMC7kxJS4FzhziGFcwBYCNeRPxE0jjSJ/urSCdbSJ+2PxwRqyQdCHxT0u+QTni1l4F25tfW9gjOknRTRHyywSEPAr7TqC6SZpCGW/4ZOBj4S9IY/aWkK4YgnbivBu6oe/l/k3oZtaY2Ok42B6iv38HsHJ6q9SRpOOkp0gT0ZcBS0sT5C0McwwrmALBW8TbgHuANpCEggPq1zHdExA3ADZL2B349Ir4q6feBIyLiI/kS0u9FxPMMrhf49CDPrSMNtcyDl8NpK3BbRJwOL1+y+kJdrwHgN4Ej6soOAv62/iCSjgXeSBrPrzUJ+GH97rkuGyQtBN4L/B1wX0RsyVcBDdUjskJ5bNBGPElvJd049QLwfeCdpCtdBHwuT+zeS/57lvQu0jj4DKVLZ2pPfh8A/k3SOfnE2G9UeqnOAzZERP9J9hUnzoh4PiKera8icIKkb0j6Bul+hf7LRDsljcr7fSYiemp/yMM5eb/O/PhNwBeAM2ou5RwnqZt0CWx9L6L//YmIT5CGorYBb67Zxx/2bAD/UVgreAvwlfx4PKkncCfpZH5mzRBQn6TXkU6Ap0ZEX7788s+BSwAi4kOSriF9iq/9ADSKNLF7MvDumvJ980+9Uez899PJwB5A/0n5fwMXkoefJP3WIG18N3CFpDuAW4A/iIgHa57/OOnk/48MnDTuBO6QNGDSW9Kf1exj9gryN4JZK5CkqPtjzSf7nw61pIKk8cCBEfGfe7k++wH75Bu1OoGOiNi+q9ft5nuPrrkjeXf2Pwj40e4sLWFWywFgZlaoyuYAJE2QNFfSxKqOYWZme66SAMjd7ttI66LcK6lL0hOSVuWfw/J+yyStkbS4inqYmdngqpoYOhw4LyIeyGFwKnBDRCzs30HSfNK46UxJX5A0LSIeHepNJ06cGFOmTKmoymZm7Wnt2rXPRURXfXklARAR9wFImk3qBdxEukzuGNLiVWeSrrXuv5phJWlp3wEBIGkB6RZ6uru76evrq9/FzMyGIOnxRuVVzgGIdMv+ZtIt8HMi4ijS5XHHkdY56V8PfRPpBpcBImJp/zXTXV0DAszMzPZQZQEQyTmkOycnR0T/7et9pNUOtwFjcln/bf5mZtYkVU0CL8w34EBar+VvJE2X1EH65qXvAmtJwz4A0/EXVpiZNVVVk8BLgRWSTgceIS3j+7ek2+NvjYh78lotq/N67POAGRXVxczMGqhqEngzaV31WofX7bM1r6E+F7g8IrZUURczM2tsWNcHyUEx2JdhmJlZhTzxamZWKAeAmVmhilkidsqi2xuWr19yfJNrYmY2MrgHYGZWKAeAmVmhHABmZoVyAJiZFcoBYGZWKAeAmVmhHABmZoVyAJiZFcoBYGZWKAeAmVmhHABmZoVyAJiZFcoBYGZWKAeAmVmhHABmZoVyAJiZFcoBYGZWKAeAmVmhKgsASRMkzZU0sapjmJnZnqskACSNB24DjgLuldQlaZmkNZIW1+w3oMzMzJqjqh7A4cB5EXEp8DXgWKAjImYCUyVNkzS/vqyiupiZWQOdVbxpRNwHIGk2qRcwAViRn14JzALe3qDs0fr3krQAWADQ3d1dRXXNzIpU5RyAgJOAzUAAG/JTm4BJwNgGZQNExNKI6ImInq6urqqqa2ZWnMoCIJJzgHXA0cCY/NS4fNxtDcrMzKxJqpoEXijplLx5ILCENMQDMB1YD6xtUGZmZk1SyRwAsBRYIel04BHgFuB+SZOBecAM0rDQ6royMzNrkqomgTcDc2vLJPXmsssjYstgZWZm1hxV9QAGyKGwYldlZmbWHJ54NTMrlAPAzKxQDgAzs0I5AMzMCuUAMDMrlAPAzKxQDgAzs0I5AMzMCuUAMDMrlAPAzKxQDgAzs0I5AMzMCuUAMDMrlAPAzKxQDgAzs0I5AMzMCuUAMDMrlAPAzKxQDgAzs0I5AMzMCuUAMDMrlAPAzKxQlQSApAMk3SlppaSbJY2W9ISkVfnnsLzfMklrJC2uoh5mZja4qnoA7weujIh3As8Ai4AbIqI3/zwsaT7QEREzgamSplVUFzMza6CSAIiIqyPi7rzZBfwMOEHSt/On/k6gF1iR91kJzGr0XpIWSOqT1Ldx48YqqmtmVqRK5wAkzQTGA3cDcyLiKGAUcBwwFtiQd90ETGr0HhGxNCJ6IqKnq6uryuqamRWls6o3ljQBuAp4L/BMRGzPT/UB04BtwJhcNg5PSJuZNVVVk8CjgRuB8yPiceB6SdMldQAnAt8F1rJz2Gc6sL6KupiZWWNV9QBOA44ELpB0AXAvcD0g4NaIuEfS/sBqSZOBecCMiupiZmYNVBIAEXENcE1d8cV1+2yV1AvMBS6PiC1V1MXMzBqrbA5gd0TEZnZeCWRmZk3kiVczs0I5AMzMCuUAMDMrlAPAzKxQDgAzs0I5AMzMCuUAMDMrlAPAzKxQDgAzs0I5AMzMCuUAMDMrlAPAzKxQDgAzs0I5AMzMCuUAMDMrlAPAzKxQDgAzs0I5AMzMCuUAMDMrlAPAzKxQDgAzs0I5AMzMClVJAEg6QNKdklZKulnSaEnLJK2RtLhmvwFlZmbWHFX1AN4PXBkR7wSeAU4GOiJiJjBV0jRJ8+vLKqqLmZk10FnFm0bE1TWbXcDvAZ/K2yuBWcDbgRV1ZY/Wv5ekBcACgO7u7iqqa2ZWpErnACTNBMYDTwIbcvEmYBIwtkHZABGxNCJ6IqKnq6uryuqamRWlsgCQNAG4CjgV2AaMyU+Ny8dtVGZmZk1S1STwaOBG4PyIeBxYSxriAZgOrB+kzMzMmqSSOQDgNOBI4AJJFwBfBD4gaTIwD5gBBLC6rszMzJqkqknga4Brassk3QrMBS6PiC25rLe+zMzMmqOqHsAAEbGZnVf9DFpmZmbNscdzAJKmSDpsb1bGzMyaZ5cBIOnvJF0n6V11T30SeGs11TIzs6rtTg9gAvAJaoaLJF0EPBURX6moXmZmVrHdCYAg3aE7UdIZklYAmyLij6qtmpmZVWnQSWBJRwO/ArwO2A84GHg9cCiwoym1MzOzygzVA/h30sl/DHAx8PmI+GhevG2cpE83o4JmZlYNRcSud5LmAscCF0bEi7nsS8AdEdG0yzh7enqir69vj147ZdHtu73v+iXH79ExzMxGIklrI6KnvnyX9wFIOhXojYhTJH1eUgDnA4tIQ0RmZtaChgwASRcChwBnAUTEGZKOBW4DlkfEVdVX0czMqjDoHIAkAWsj4gzgQElTJU0lLdp2GvC0pLObU00zM9vbdjUEdBbp0/6lwOaa8qdJ6/yvr6ZaZmZWtUF7AJFmh98g6R3A88BfAAcB20kBcH9ELG1KLc3MbK/bVQ9ApGWdf4F0OeiXgH2Bw4DTJD0SET+stIZmZlaJoW4E2wd4NiKuyvMBHyPdFSzgMeAMYBlQv0aQmZm1gKGGgHYAH5T02Yj4a2AacD1pbaBbIuIx4KKm1NLMzPa6Xa0F9G7gHZJOyfv+IvAd4OuS/ldErKm4fmZmVpFdBcALwCXAj9m5/s/3gA8DH5c0qcK6mZlZhXYVAI8DZwJ/AIwCRgOnAutIS0SfX2ntzMysMkNeBRQR3yBP8kr63YhYLulGUnDcBWyqvopmZlaF3f5O4IhYnn/X3hD2wF6vkZmZNcUefyewmZm1NgeAmVmhKg0ASZMkrc6POyU9IWlV/jksly+TtEbS4irrYmZmr1RZAEgaD1wLjM1FhwM3RERv/nlY0nygI3/L2FRJ06qqj5mZvVKVPYCXgJOArXl7BnCCpG/nT/2dQC/Q/41iK4FZ9W8iaYGkPkl9GzdurLC6ZmZlqSwAImJrRGypKXoQmBMRR5HuKTiO1DvYkJ/fBAy4sSwilkZET0T0dHV1VVVdM7Pi7PZloHvBuojYnh/3kdYW2kZaZRRgHJ6UNjNrmmaecK+XNF1SB3Ai8F1gLTuHfabjL5gxM2uaZvYALgGWk5aTvjUi7pG0P7Ba0mRgHmmewMzMmqDyAIiI3vz7EdKVQLXPbZXUC8wFLq+bMzAzswo1swfQUF5aYsUudzQzs73Kk65mZoUa9h7ASDRl0e0Ny9cvOb7JNTEzq457AGZmhXIAmJkVygFgZlYoB4CZWaEcAGZmhXIAmJkVygFgZlYoB4CZWaEcAGZmhXIAmJkVygFgZlYoB4CZWaEcAGZmhXIAmJkVygFgZlYoB4CZWaEcAGZmhXIAmJkVygFgZlaoSgNA0iRJq2u2l0laI2nxUGVmZla9ygJA0njgWmBs3p4PdETETGCqpGmNyqqqj5mZvVKVPYCXgJOArXm7F1iRH68EZg1S9gqSFkjqk9S3cePGCqtrZlaWygIgIrZGxJaaorHAhvx4EzBpkLL691kaET0R0dPV1VVVdc3MitPMSeBtwJj8eFw+dqMyMzNrgmaecNeyc4hnOrB+kDIzM2uCziYe6xZgtaTJwDxgBhANyszMrAkq7wFERG/+vZU06fsAcExEbGlUVnV9zMwsaWYPgIjYzM6rfgYtMzOz6nnS1cysUA4AM7NCOQDMzArlADAzK5QDwMysUA4AM7NCOQDMzArlADAzK5QDwMysUA4AM7NCOQDMzArlADAzK5QDwMysUA4AM7NCOQDMzArlADAzK1RTvxCm1U1ZdPuAsvVLjh+GmpiZvXbuAZiZFcoBYGZWKAeAmVmhHABmZoVyAJiZFappASCpU9ITklbln8MkLZO0RtLiZtXDzMySZvYADgduiIjeiOgFpgEdETETmCppWhPrYmZWvGbeBzADOEHSMcDDwHZgRX5uJTALeLT+RZIWAAsAuru7m1NTM7MCNLMH8CAwJyKOAkYB84AN+blNwKRGL4qIpRHRExE9XV1dzampmVkBmhkA6yLi6fy4D5gIjMnb45pcFzOz4jXzpHu9pOmSOoATgXNIwz4A04H1TayLmVnxmjkHcAmwHBBwK3ALsFrSZNJw0Iwm1sXMrHhNC4CIeIR0JdDLJPUCc4HLI2JLs+piZmbDvBpoRGxm55VAZmbWRF4OuiKNlo4GLx9tZiOHr7wxMyuUA8DMrFAOADOzQjkAzMwK5QAwMyuUrwIaAXzFkJkNB/cAzMwK5QAwMyuUA8DMrFAOADOzQjkAzMwK5QAwMyuULwNtssEu+TQzazb3AMzMCuUewGtU5Sd63yBmZlVyD8DMrFAOADOzQjkAzMwK5TmAFtRobsDzAmb2ajkA2tyrmUiuat92Umq7rT05AKwt+URttmvDHgCSlgFvBW6PiI8Pd31a1au9HLVdbkhrl3ZY+xrJH0aGNQAkzQc6ImKmpC9ImhYRjw5nnWygV3OSHcl/7INpxTqb7Q2KiOE7uPTXwF0RcYekk4ExEfHFun0WAAvy5puBH+zh4SYCz+1xZUe+dm8ftH8b3b7WN1Lb+MaI6KovHO4hoLHAhvx4E3Bk/Q4RsRRY+loPJKkvInpe6/uMVO3ePmj/Nrp9ra/V2jjc9wFsA8bkx+MY/vqYmRVjuE+4a4FZ+fF0YP3wVcXMrCzDPQR0C7Ba0mRgHjCjwmO95mGkEa7d2wft30a3r/W1VBuHdRIYQNJ4YC5wf0Q8M6yVMTMryLAHgJmZDY/hngMwK46kCZLmSpo43HWxshURAJKWSVojafFw12VvkDRJ0uqa7QHta9U2SzpA0p2SVkq6WdLoNmvfeOA24CjgXkld7dS+fvlv9KH8uG3aJ6lT0hOSVuWfw1q5fW0fALV3GwNTJU0b7jq9FvkEci3pHoqG7WvxNr8fuDIi3gk8A5xMe7XvcOC8iLgU+BpwLO3Vvn5XAGPa8O/zcOCGiOiNiF5gGi3cvuG+CqgZeoEV+fFK0mWnrbzcxEvAScA/5O1eBrbv7Q3KWqLNEXF1zWYX8HvAp/J2O7TvPgBJs0m9gAm00f8/AEnHAj8mBXgv7dW+GcAJko4BHga208Lta/seAAPvNp40jHV5zSJia0RsqSlq1L6Wb7OkmcB44EnarH2SRArxzUDQRu2TNBr4GLAoF7Xb3+eDwJyIOAoYRbp8vWXbV0IAtPvdxo3a19JtljQBuAo4lTZsXyTnAOuAo2mv9i0Cro6IH+Xtdvv/ty4ins6P+0hr/7Rs+0Zsxfaidr/buFH7WrbN+RPkjcD5EfE47de+hZJOyZsHAktoo/YBc4BzJK0CjgDeTXu173pJ0yV1ACcC59DC7SthDuAWmne38XC4hYHtiwZlreI00qKAF0i6APgi8IE2at9SYIWk04FHSP//7m+X9kXE7P7HOQTeQ3v9fV4CLAcE3EqL//sr4kawdr/buFH72qnNbp/bN5K1cvuKCAAzMxuohDkAMzNrwAFgZlYoB4CZWaEcAFY0SWdLOniQ55Rv2hrstR21z0vqrHk86OvMRgoHgJXut4H/BpD0BUn/JOlBSZcBM4H78qJf6yWtlfSfkr6TL3G8j3Ste7/PSOrNj39L0lWNDijp6zWPr5V06F5vldluKOE+ALOGJD1CWq9mraTPAB3A7wKHkm73/6ak64A3kf6tPAZMBX4IPAF8JyL6V7zcj3T/wh9KGgWcDTwr6ZiIuDfvI9KHrp/k3sIhwGzSPQ8T8vtd1qTmmzkArGjrSStybib9W3h9flz7ify9wFn5+buBY4Bvk9YoOhf4ct7vXOBfImJH/uR/NfBV4GZJXRGxgrT426eBtwF/D/wM+D5p+YS/It1UZNY0DgAr3QbgLcBPgf0jYqukLcB7JK0jrfi4LO97RP7dk39/HUDSL5GWsX5I0rWkYFkVEdslLSEtjdAZEcslXQF8krTW0UbSYmH3AndGxL9U21SzV/KNYFYkSWcDFwCrgG7SyfxbEXFXfv5I4EfAm4G3kpbhrtUBPBgR90v6NdKyzicB5wO/AYyKiC9K+mZEHC1JERGSlgNvIC0D8a+kxeDuIA0FjQU+ExHfqq7lZju5B2BFiohrJB0PfAXYl7Rq40cl/Qmwg7Sm+58Bx5HWdoF00oedYTBHUk9EXKn09Y4nRcTTeT7gybzP8/l4IWku8D1gf+BCUlh0Awvyvo8B/1FJg80acABYkfIkrIA3AvsB/wW8CLw3Il6QdE9EfBb4rKTfJ43Zvy+//EZgdkR8dZC3/wXgmw3KN5G+3GZmRGyS9CDwbERcIWkKsCQiNu6VBprtBl8GaqWaDfwbaRJ3OmkOQMAdku4BDpN0iKTbSFcB7Uv6tL4jPz5e0uqaewj2gfRduPm9B4znR8TaiPhxbRFwdr6k9Mv1+5tVzQFgpXoT6cvZtwM/IH3FZidwXETMyWVPAR+NiMXAQcDBpMna/4qIs4DTalZ63De//i+BhRGxI5fv3+DYr8u/RwHX5O+WPRn3yK3JPAlslkkaExE/acJxDoiILflLRYiI+glms6ZwAJiZFcpDQGZmhXIAmJkVygFgZlYoB4CZWaH+P1iOCptF/HPbAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "# ④ \n",
    "df = titanic['fare'].sort_values(ascending=False) \n",
    "plt.figure() \n",
    "plt.hist(df, bins=(np.arange(0, 550, 10))) \n",
    "plt.xlabel('船票价格') \n",
    "plt.ylabel('频次') \n",
    "plt.title('船票价格直方图') \n",
    "plt.show() "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "id": "822b9b52",
   "metadata": {},
   "outputs": [],
   "source": [
    "# ⑤ \n",
    "titanic['familysize'] = titanic.loc[: , 'SibSp'] + titanic.loc[: , 'Parch'] + 1 "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "id": "467bcb2b",
   "metadata": {},
   "outputs": [
    {
     "ename": "IntCastingNaNError",
     "evalue": "Cannot convert non-finite values (NA or inf) to integer",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mIntCastingNaNError\u001b[0m                        Traceback (most recent call last)",
      "Input \u001b[1;32mIn [76]\u001b[0m, in \u001b[0;36m<cell line: 2>\u001b[1;34m()\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[38;5;66;03m# ⑥ \u001b[39;00m\n\u001b[1;32m----> 2\u001b[0m titanic[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mSex\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[43mtitanic\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mSex\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmap\u001b[49m\u001b[43m(\u001b[49m\u001b[43m{\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mfemale\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m:\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mmale\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m:\u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m}\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mastype\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mint\u001b[39;49m\u001b[43m)\u001b[49m\n",
      "File \u001b[1;32mD:\\Anaconda\\envs\\myjupyter\\lib\\site-packages\\pandas\\core\\generic.py:5912\u001b[0m, in \u001b[0;36mNDFrame.astype\u001b[1;34m(self, dtype, copy, errors)\u001b[0m\n\u001b[0;32m   5905\u001b[0m     results \u001b[38;5;241m=\u001b[39m [\n\u001b[0;32m   5906\u001b[0m         \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39miloc[:, i]\u001b[38;5;241m.\u001b[39mastype(dtype, copy\u001b[38;5;241m=\u001b[39mcopy)\n\u001b[0;32m   5907\u001b[0m         \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(\u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcolumns))\n\u001b[0;32m   5908\u001b[0m     ]\n\u001b[0;32m   5910\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m   5911\u001b[0m     \u001b[38;5;66;03m# else, only a single dtype is given\u001b[39;00m\n\u001b[1;32m-> 5912\u001b[0m     new_data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_mgr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mastype\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcopy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcopy\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merrors\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m   5913\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_constructor(new_data)\u001b[38;5;241m.\u001b[39m__finalize__(\u001b[38;5;28mself\u001b[39m, method\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mastype\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m   5915\u001b[0m \u001b[38;5;66;03m# GH 33113: handle empty frame or series\u001b[39;00m\n",
      "File \u001b[1;32mD:\\Anaconda\\envs\\myjupyter\\lib\\site-packages\\pandas\\core\\internals\\managers.py:419\u001b[0m, in \u001b[0;36mBaseBlockManager.astype\u001b[1;34m(self, dtype, copy, errors)\u001b[0m\n\u001b[0;32m    418\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mastype\u001b[39m(\u001b[38;5;28mself\u001b[39m: T, dtype, copy: \u001b[38;5;28mbool\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m, errors: \u001b[38;5;28mstr\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mraise\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m T:\n\u001b[1;32m--> 419\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mastype\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcopy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcopy\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merrors\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[1;32mD:\\Anaconda\\envs\\myjupyter\\lib\\site-packages\\pandas\\core\\internals\\managers.py:304\u001b[0m, in \u001b[0;36mBaseBlockManager.apply\u001b[1;34m(self, f, align_keys, ignore_failures, **kwargs)\u001b[0m\n\u001b[0;32m    302\u001b[0m         applied \u001b[38;5;241m=\u001b[39m b\u001b[38;5;241m.\u001b[39mapply(f, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m    303\u001b[0m     \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m--> 304\u001b[0m         applied \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mgetattr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mb\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mf\u001b[49m\u001b[43m)\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m    305\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (\u001b[38;5;167;01mTypeError\u001b[39;00m, \u001b[38;5;167;01mNotImplementedError\u001b[39;00m):\n\u001b[0;32m    306\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m ignore_failures:\n",
      "File \u001b[1;32mD:\\Anaconda\\envs\\myjupyter\\lib\\site-packages\\pandas\\core\\internals\\blocks.py:580\u001b[0m, in \u001b[0;36mBlock.astype\u001b[1;34m(self, dtype, copy, errors)\u001b[0m\n\u001b[0;32m    562\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m    563\u001b[0m \u001b[38;5;124;03mCoerce to the new dtype.\u001b[39;00m\n\u001b[0;32m    564\u001b[0m \n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m    576\u001b[0m \u001b[38;5;124;03mBlock\u001b[39;00m\n\u001b[0;32m    577\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m    578\u001b[0m values \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mvalues\n\u001b[1;32m--> 580\u001b[0m new_values \u001b[38;5;241m=\u001b[39m \u001b[43mastype_array_safe\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalues\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcopy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcopy\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merrors\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m    582\u001b[0m new_values \u001b[38;5;241m=\u001b[39m maybe_coerce_values(new_values)\n\u001b[0;32m    583\u001b[0m newb \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmake_block(new_values)\n",
      "File \u001b[1;32mD:\\Anaconda\\envs\\myjupyter\\lib\\site-packages\\pandas\\core\\dtypes\\cast.py:1292\u001b[0m, in \u001b[0;36mastype_array_safe\u001b[1;34m(values, dtype, copy, errors)\u001b[0m\n\u001b[0;32m   1289\u001b[0m     dtype \u001b[38;5;241m=\u001b[39m dtype\u001b[38;5;241m.\u001b[39mnumpy_dtype\n\u001b[0;32m   1291\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m-> 1292\u001b[0m     new_values \u001b[38;5;241m=\u001b[39m \u001b[43mastype_array\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalues\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcopy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcopy\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m   1293\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (\u001b[38;5;167;01mValueError\u001b[39;00m, \u001b[38;5;167;01mTypeError\u001b[39;00m):\n\u001b[0;32m   1294\u001b[0m     \u001b[38;5;66;03m# e.g. astype_nansafe can fail on object-dtype of strings\u001b[39;00m\n\u001b[0;32m   1295\u001b[0m     \u001b[38;5;66;03m#  trying to convert to float\u001b[39;00m\n\u001b[0;32m   1296\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m errors \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mignore\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n",
      "File \u001b[1;32mD:\\Anaconda\\envs\\myjupyter\\lib\\site-packages\\pandas\\core\\dtypes\\cast.py:1237\u001b[0m, in \u001b[0;36mastype_array\u001b[1;34m(values, dtype, copy)\u001b[0m\n\u001b[0;32m   1234\u001b[0m     values \u001b[38;5;241m=\u001b[39m values\u001b[38;5;241m.\u001b[39mastype(dtype, copy\u001b[38;5;241m=\u001b[39mcopy)\n\u001b[0;32m   1236\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m-> 1237\u001b[0m     values \u001b[38;5;241m=\u001b[39m \u001b[43mastype_nansafe\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalues\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcopy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcopy\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m   1239\u001b[0m \u001b[38;5;66;03m# in pandas we don't store numpy str dtypes, so convert to object\u001b[39;00m\n\u001b[0;32m   1240\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(dtype, np\u001b[38;5;241m.\u001b[39mdtype) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28missubclass\u001b[39m(values\u001b[38;5;241m.\u001b[39mdtype\u001b[38;5;241m.\u001b[39mtype, \u001b[38;5;28mstr\u001b[39m):\n",
      "File \u001b[1;32mD:\\Anaconda\\envs\\myjupyter\\lib\\site-packages\\pandas\\core\\dtypes\\cast.py:1148\u001b[0m, in \u001b[0;36mastype_nansafe\u001b[1;34m(arr, dtype, copy, skipna)\u001b[0m\n\u001b[0;32m   1145\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcannot astype a timedelta from [\u001b[39m\u001b[38;5;132;01m{\u001b[39;00marr\u001b[38;5;241m.\u001b[39mdtype\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m] to [\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mdtype\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m]\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m   1147\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m np\u001b[38;5;241m.\u001b[39missubdtype(arr\u001b[38;5;241m.\u001b[39mdtype, np\u001b[38;5;241m.\u001b[39mfloating) \u001b[38;5;129;01mand\u001b[39;00m np\u001b[38;5;241m.\u001b[39missubdtype(dtype, np\u001b[38;5;241m.\u001b[39minteger):\n\u001b[1;32m-> 1148\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mastype_float_to_int_nansafe\u001b[49m\u001b[43m(\u001b[49m\u001b[43marr\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcopy\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m   1150\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m is_object_dtype(arr\u001b[38;5;241m.\u001b[39mdtype):\n\u001b[0;32m   1151\u001b[0m \n\u001b[0;32m   1152\u001b[0m     \u001b[38;5;66;03m# work around NumPy brokenness, #1987\u001b[39;00m\n\u001b[0;32m   1153\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m np\u001b[38;5;241m.\u001b[39missubdtype(dtype\u001b[38;5;241m.\u001b[39mtype, np\u001b[38;5;241m.\u001b[39minteger):\n",
      "File \u001b[1;32mD:\\Anaconda\\envs\\myjupyter\\lib\\site-packages\\pandas\\core\\dtypes\\cast.py:1193\u001b[0m, in \u001b[0;36mastype_float_to_int_nansafe\u001b[1;34m(values, dtype, copy)\u001b[0m\n\u001b[0;32m   1189\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m   1190\u001b[0m \u001b[38;5;124;03mastype with a check preventing converting NaN to an meaningless integer value.\u001b[39;00m\n\u001b[0;32m   1191\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m   1192\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m np\u001b[38;5;241m.\u001b[39misfinite(values)\u001b[38;5;241m.\u001b[39mall():\n\u001b[1;32m-> 1193\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m IntCastingNaNError(\n\u001b[0;32m   1194\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCannot convert non-finite values (NA or inf) to integer\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m   1195\u001b[0m     )\n\u001b[0;32m   1196\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m values\u001b[38;5;241m.\u001b[39mastype(dtype, copy\u001b[38;5;241m=\u001b[39mcopy)\n",
      "\u001b[1;31mIntCastingNaNError\u001b[0m: Cannot convert non-finite values (NA or inf) to integer"
     ]
    }
   ],
   "source": [
    "# ⑥ \n",
    "titanic['Sex'] = titanic['Sex'].map({'female':0, 'male':1}).astype(int) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "id": "dff5cd31",
   "metadata": {},
   "outputs": [],
   "source": [
    "# ⑦ \n",
    "from sklearn.model_selection import train_test_split \n",
    "x = titanic.loc[:, ['Pclass', 'Sex', 'familysize']] \n",
    "y = titanic.loc[:, 'Survived'] \n",
    "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42) \n",
    "# ⑧ \n",
    "from sklearn import neighbors \n",
    "clf = neighbors.KNeighborsClassifier(n_neighbors=20) \n",
    "clf.fit(x_train, y_train) # KNN训练 \n",
    "# ⑨ \n",
    "pre = clf.predict(x_test) # 模型预测 \n",
    "# ⑩ \n",
    "from sklearn.metrics import classification_report \n",
    "res = classification_report(y_test, pre) # 模型评估 "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b6f8dd2d",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "86c570e3",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2464493b",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
